images_from_link 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1898d9b69a6941bf657a8f397059d208e6a1551a
4
- data.tar.gz: aea71848eafa0c0b8df073dd82b4c079492716bd
3
+ metadata.gz: 522fc31690af948adc4b51612c4d25d38b0c12f3
4
+ data.tar.gz: b26d5ef7ab8aa05bc43d00cf202d0d07e000987b
5
5
  SHA512:
6
- metadata.gz: 523b977f91b97bd5e7cef2024eb217f97f2e9d416fe53f3fafed90a439686a9e2e9b64cad91f8b4b040d339403c1e3df32f7b979bfb47cd1c38d607381814e66
7
- data.tar.gz: f7b8cb0e6817a9d6888730c516c5ac3f94c90d59a539114b6fbcca6dff9c4d78c85bdfe76b9e14e92d26ba200c5dd6637a93f84c65d9ff8b64818224ead3d868
6
+ metadata.gz: fbcd60da05ca7a14e0cd8de84b42cdb6fbed090e90533c81cd871e6c7eac54990f17f90b89baff426a41392bf0ee97bad1e79873c2d1ded1beb80266a1ce316a
7
+ data.tar.gz: 358f80bb52a5129d91947eead0cd68dab285f569d487b7b3752f311c8440f9b7c4640b1ca5a6c52badd6239898e139944e9d88e5869a84345b3a40bd9a06af80
data/README.md CHANGED
@@ -28,7 +28,7 @@ ImagesFromLink.get_images('https://www.google.com')
28
28
  =>
29
29
  [
30
30
  "https://www.google.com/textinputassistant/tia.png",
31
- "https://www.google.com/images/nav_logo229.png",
31
+ "https://www.google.com/images/nav_logo229.png",
32
32
  "https://www.google.com/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png"
33
33
  ]
34
34
  ```
@@ -0,0 +1,71 @@
1
+ module HandlerLink
2
+ FORMAT_IMG = ["jpg", "jpeg", "png", "gif"]
3
+
4
+ # gets the url, returns domain
5
+ def self.get_host_link(link)
6
+ uri = URI.parse(link)
7
+ "#{uri.scheme}://#{uri.host}"
8
+ end
9
+
10
+ def self.handler_links(array_links, link)
11
+ array_links.each do |url|
12
+ if url.include?("(/")
13
+ uri = get_host_link(link)
14
+
15
+ position = url.index("(")
16
+ url[position] += uri.to_s
17
+ end
18
+ end
19
+ end
20
+
21
+ # adds scheme if this href
22
+ def self.handler_prefix_link(host_link, link)
23
+ abort 'expect strings params' unless host_link.is_a?(String) || link.is_a?(String)
24
+
25
+ if link[0] == '/' && link[1] != '/'
26
+ host_link + link
27
+ elsif link[0..1] == '//'
28
+ uri = URI.parse(host_link)
29
+ "#{uri.scheme}:#{link}"
30
+ else
31
+ link
32
+ end
33
+ end
34
+
35
+ def self.remove_unless_symbols(array_images_links)
36
+ array_images_links.each do |image_url|
37
+ if (image_url[0..3] != "http" || image_url[0..3] != "www.") && image_url.include?("(")
38
+ position = image_url.index("(")
39
+ image_url.reverse!
40
+ position.times { image_url.chop! }
41
+ image_url.reverse!
42
+ image_url.delete!("(,;'')")
43
+ end
44
+ end
45
+ end
46
+
47
+ def self.remove_global_unless_symbols(array_images_links)
48
+ array_images_links.each { |link| link.delete!("(,;'')") }
49
+ end
50
+
51
+ # remove link if link not valid
52
+ def self.remove_unless_link(array_links)
53
+
54
+ array_links.each_with_index do |link, index|
55
+ array_links[index] = "" if link[0..3] != "http"
56
+
57
+ index_ending = nil
58
+
59
+ FORMAT_IMG.each do |i|
60
+ index_ending = i if link.include?(i)
61
+ end
62
+
63
+ unless index_ending == nil
64
+ position = link.index(index_ending)
65
+ array_links[index] = "" if (link[position + index_ending.size] =~ /[a-z]/)
66
+ end
67
+
68
+ end
69
+ array_links.delete("")
70
+ end
71
+ end
@@ -1,9 +1,10 @@
1
1
  require "images_from_link/version"
2
- require 'HandlerLink'
3
- require 'ImagesLink'
4
2
 
5
3
  module ImagesFromLink
6
- # выводит каритнки по переданному урлу
4
+ require_relative 'handler_link'
5
+ require_relative 'images_link'
6
+
7
+ # extract images from got url
7
8
  def self.get_images(link)
8
9
  images_link = ImagesLink.new(link)
9
10
  images_link.get_images_from_url
@@ -1,3 +1,3 @@
1
1
  module ImagesFromLink
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -1,67 +1,78 @@
1
1
  require 'httparty'
2
2
  require 'nokogiri'
3
3
  require 'uri'
4
- require_relative 'HandlerLink'
4
+ require_relative 'handler_link'
5
5
 
6
6
  class ImagesLink
7
7
 
8
+ # expect valid url
8
9
  def initialize(link)
9
10
  @link = link
10
11
 
11
12
  begin
12
13
  @response = HTTParty.get(@link)
13
14
  rescue Errno::ECONNREFUSED => e
14
- puts "Неправильный урл"
15
+ puts "not valid url"
15
16
  abort e.message
16
17
  end
17
18
 
18
19
  @doc = Nokogiri::HTML(@response.body)
19
20
  @doc.search('//noscript').each { |node| node.remove } # убираю мешающие ноды
20
- @arra_links = [] # сдесь будут храниться все урлы картинок
21
- @handler_link = HandlerLink.new # обработчик урлов
21
+ @links = [] # сдесь будут храниться все урлы картинок
22
+ @handler_link = HandlerLink # обработчик урлов
22
23
  @link_host_name = @handler_link.get_host_link(@link) # беру имя домена
23
24
  end
24
25
 
25
- # возвращает, все найденные, урлы картинок
26
+ # returns all found images url
26
27
  def get_images_from_url
27
- @arra_links = (get_url_with_attr_img_link + get_url_with_other_attr).uniq
28
- @handler_link.remove_global_unless_symbols(@arra_links)
29
- @arra_links
28
+ @links = (images_from_img_tag + images_from_link_tag + images_from_extension).uniq
29
+ @handler_link.remove_global_unless_symbols(@links)
30
+ @links
30
31
  end
31
32
 
32
- def get_url_with_attr_img_link
33
- img_arra = []
34
- # пробегаю по тегам img (meta og:images...), хватаю урл и закидываю в @arra_links
33
+ # returns all images url with tags img['src']
34
+ def images_from_img_tag
35
+ img_array = []
36
+ # пробегаю по тегам img (meta og:images...), хватаю урл и закидываю в @links
35
37
  @doc.xpath('//img').each do |img|
36
38
  if img['src'] != nil
37
- arra = [img['src'].to_s]
38
- src = @handler_link.remove_unless_symbols(arra)
39
+ array = [img['src'].to_s]
40
+ src = @handler_link.remove_unless_symbols(array)
39
41
  got_link = @handler_link.handler_prefix_link(@link_host_name, src.to_s.delete!("[\"]"))
40
42
 
41
- img_arra << got_link
43
+ img_array << got_link
42
44
  end
43
45
  end
44
46
 
47
+ img_array.uniq!
48
+ @handler_link.remove_unless_link(img_array)
49
+ img_array
50
+ end
51
+
52
+ # returns all images url with tags link['href']
53
+ def images_from_link_tag
54
+ img_array = []
45
55
  @doc.xpath('//link').each do |link|
46
56
  if link['href'] != nil && link['type'] != nil
47
57
  if link['type'].include?("image")
48
58
  got_link = @handler_link.handler_prefix_link(@link_host_name, link['href'])
49
- img_arra << got_link
59
+ img_array << got_link
50
60
  end
51
61
  end
52
62
  end
53
63
 
54
- @arra_links.uniq!
55
- @handler_link.remove_unless_link(@arra_links)
56
- img_arra
64
+ img_array.uniq!
65
+ @handler_link.remove_unless_link(img_array)
66
+ img_array
57
67
  end
58
68
 
59
- def get_url_with_other_attr
60
- # нахожу все урлы с jpg, png, gif... и закидываю в @arra_links
69
+ # returns all images url with jpg, png, gif...
70
+ def images_from_extension
71
+ # нахожу все урлы с jpg, png, gif...
61
72
  @images_links = URI.extract(@doc.to_s.encode("UTF-16be", :invalid => :replace, :replace => "?").encode('UTF-8')).select { |l| l[/\.(?:gif|png|jpe?g)\b/] }
62
73
  @handler_link.handler_links(@images_links, @link) # обрабатываю урлы
63
74
  @handler_link.remove_unless_symbols(@images_links)
64
75
  @handler_link.remove_unless_link(@images_links)
65
- @images_links
76
+ @images_links.uniq
66
77
  end
67
78
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: images_from_link
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - zerocool
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-03-18 00:00:00.000000000 Z
11
+ date: 2017-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -96,10 +96,10 @@ files:
96
96
  - bin/console
97
97
  - bin/setup
98
98
  - images_from_link.gemspec
99
- - lib/HandlerLink.rb
100
- - lib/ImagesLink.rb
99
+ - lib/handler_link.rb
101
100
  - lib/images_from_link.rb
102
101
  - lib/images_from_link/version.rb
102
+ - lib/images_link.rb
103
103
  homepage: https://github.com/exwarvlad/images_from_link
104
104
  licenses:
105
105
  - MIT
@@ -1,70 +0,0 @@
1
- class HandlerLink
2
- FORMAT_IMG = ["jpg", "jpeg", "png", "gif"]
3
-
4
- # получает урл - отдает домен
5
- def get_host_link(link)
6
- uri = URI.parse(link)
7
- "#{uri.scheme}://" + uri.host
8
- end
9
-
10
- def handler_links(arra_links, link)
11
- arra_links.each do |url|
12
- if url.include?("(/")
13
- uri = get_host_link(link)
14
-
15
- position = url.index("(")
16
- url[position] += uri.to_s
17
- end
18
- end
19
- end
20
-
21
- # добавляет scheme, если это href
22
- def handler_prefix_link(host_link, link)
23
- abort 'в качестве аргументов передайте строки' unless host_link.is_a?(String) || link.is_a?(String)
24
-
25
- if link[0] == '/' && link[1] != '/'
26
- host_link + link
27
- elsif link[0..1] == '//'
28
- uri = URI.parse(host_link)
29
- "#{uri.scheme}:#{link}"
30
- else
31
- link
32
- end
33
- end
34
-
35
- def remove_unless_symbols(arra_images_links)
36
- arra_images_links.each do |image_url|
37
- if (image_url[0..3] != "http" || image_url[0..3] != "www.") && image_url.include?("(")
38
- position = image_url.index("(")
39
- image_url.reverse!
40
- position.times { image_url.chop! }
41
- image_url.reverse!
42
- image_url.delete!("(,;'')")
43
- end
44
- end
45
- end
46
-
47
- def remove_global_unless_symbols(arra_images_links)
48
- arra_images_links.each { |link| link.delete!("(,;'')") }
49
- end
50
-
51
- def remove_unless_link(arra_link)
52
-
53
- arra_link.each_with_index do |link, index|
54
- arra_link[index] = "" if link[0..3] != "http"
55
-
56
- index_ending = nil
57
-
58
- FORMAT_IMG.each do |i|
59
- index_ending = i if link.include?(i)
60
- end
61
-
62
- unless index_ending == nil
63
- position = link.index(index_ending)
64
- arra_link[index] = "" if (link[position + index_ending.size] =~ /[a-z]/)
65
- end
66
-
67
- end
68
- arra_link.delete("")
69
- end
70
- end