images_from_link 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1898d9b69a6941bf657a8f397059d208e6a1551a
4
- data.tar.gz: aea71848eafa0c0b8df073dd82b4c079492716bd
3
+ metadata.gz: 522fc31690af948adc4b51612c4d25d38b0c12f3
4
+ data.tar.gz: b26d5ef7ab8aa05bc43d00cf202d0d07e000987b
5
5
  SHA512:
6
- metadata.gz: 523b977f91b97bd5e7cef2024eb217f97f2e9d416fe53f3fafed90a439686a9e2e9b64cad91f8b4b040d339403c1e3df32f7b979bfb47cd1c38d607381814e66
7
- data.tar.gz: f7b8cb0e6817a9d6888730c516c5ac3f94c90d59a539114b6fbcca6dff9c4d78c85bdfe76b9e14e92d26ba200c5dd6637a93f84c65d9ff8b64818224ead3d868
6
+ metadata.gz: fbcd60da05ca7a14e0cd8de84b42cdb6fbed090e90533c81cd871e6c7eac54990f17f90b89baff426a41392bf0ee97bad1e79873c2d1ded1beb80266a1ce316a
7
+ data.tar.gz: 358f80bb52a5129d91947eead0cd68dab285f569d487b7b3752f311c8440f9b7c4640b1ca5a6c52badd6239898e139944e9d88e5869a84345b3a40bd9a06af80
data/README.md CHANGED
@@ -28,7 +28,7 @@ ImagesFromLink.get_images('https://www.google.com')
28
28
  =>
29
29
  [
30
30
  "https://www.google.com/textinputassistant/tia.png",
31
- "https://www.google.com/images/nav_logo229.png",
31
+ "https://www.google.com/images/nav_logo229.png",
32
32
  "https://www.google.com/images/branding/googlelogo/1x/googlelogo_white_background_color_272x92dp.png"
33
33
  ]
34
34
  ```
@@ -0,0 +1,71 @@
1
+ module HandlerLink
2
+ FORMAT_IMG = ["jpg", "jpeg", "png", "gif"]
3
+
4
+ # gets the url, returns domain
5
+ def self.get_host_link(link)
6
+ uri = URI.parse(link)
7
+ "#{uri.scheme}://#{uri.host}"
8
+ end
9
+
10
+ def self.handler_links(array_links, link)
11
+ array_links.each do |url|
12
+ if url.include?("(/")
13
+ uri = get_host_link(link)
14
+
15
+ position = url.index("(")
16
+ url[position] += uri.to_s
17
+ end
18
+ end
19
+ end
20
+
21
+ # adds scheme if this href
22
+ def self.handler_prefix_link(host_link, link)
23
+ abort 'expect strings params' unless host_link.is_a?(String) || link.is_a?(String)
24
+
25
+ if link[0] == '/' && link[1] != '/'
26
+ host_link + link
27
+ elsif link[0..1] == '//'
28
+ uri = URI.parse(host_link)
29
+ "#{uri.scheme}:#{link}"
30
+ else
31
+ link
32
+ end
33
+ end
34
+
35
+ def self.remove_unless_symbols(array_images_links)
36
+ array_images_links.each do |image_url|
37
+ if (image_url[0..3] != "http" || image_url[0..3] != "www.") && image_url.include?("(")
38
+ position = image_url.index("(")
39
+ image_url.reverse!
40
+ position.times { image_url.chop! }
41
+ image_url.reverse!
42
+ image_url.delete!("(,;'')")
43
+ end
44
+ end
45
+ end
46
+
47
+ def self.remove_global_unless_symbols(array_images_links)
48
+ array_images_links.each { |link| link.delete!("(,;'')") }
49
+ end
50
+
51
+ # remove link if link not valid
52
+ def self.remove_unless_link(array_links)
53
+
54
+ array_links.each_with_index do |link, index|
55
+ array_links[index] = "" if link[0..3] != "http"
56
+
57
+ index_ending = nil
58
+
59
+ FORMAT_IMG.each do |i|
60
+ index_ending = i if link.include?(i)
61
+ end
62
+
63
+ unless index_ending == nil
64
+ position = link.index(index_ending)
65
+ array_links[index] = "" if (link[position + index_ending.size] =~ /[a-z]/)
66
+ end
67
+
68
+ end
69
+ array_links.delete("")
70
+ end
71
+ end
@@ -1,9 +1,10 @@
1
1
  require "images_from_link/version"
2
- require 'HandlerLink'
3
- require 'ImagesLink'
4
2
 
5
3
  module ImagesFromLink
6
- # выводит каритнки по переданному урлу
4
+ require_relative 'handler_link'
5
+ require_relative 'images_link'
6
+
7
+ # extract images from got url
7
8
  def self.get_images(link)
8
9
  images_link = ImagesLink.new(link)
9
10
  images_link.get_images_from_url
@@ -1,3 +1,3 @@
1
1
  module ImagesFromLink
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
@@ -1,67 +1,78 @@
1
1
  require 'httparty'
2
2
  require 'nokogiri'
3
3
  require 'uri'
4
- require_relative 'HandlerLink'
4
+ require_relative 'handler_link'
5
5
 
6
6
  class ImagesLink
7
7
 
8
+ # expect valid url
8
9
  def initialize(link)
9
10
  @link = link
10
11
 
11
12
  begin
12
13
  @response = HTTParty.get(@link)
13
14
  rescue Errno::ECONNREFUSED => e
14
- puts "Неправильный урл"
15
+ puts "not valid url"
15
16
  abort e.message
16
17
  end
17
18
 
18
19
  @doc = Nokogiri::HTML(@response.body)
19
20
  @doc.search('//noscript').each { |node| node.remove } # убираю мешающие ноды
20
- @arra_links = [] # сдесь будут храниться все урлы картинок
21
- @handler_link = HandlerLink.new # обработчик урлов
21
+ @links = [] # сдесь будут храниться все урлы картинок
22
+ @handler_link = HandlerLink # обработчик урлов
22
23
  @link_host_name = @handler_link.get_host_link(@link) # беру имя домена
23
24
  end
24
25
 
25
- # возвращает, все найденные, урлы картинок
26
+ # returns all found images url
26
27
  def get_images_from_url
27
- @arra_links = (get_url_with_attr_img_link + get_url_with_other_attr).uniq
28
- @handler_link.remove_global_unless_symbols(@arra_links)
29
- @arra_links
28
+ @links = (images_from_img_tag + images_from_link_tag + images_from_extension).uniq
29
+ @handler_link.remove_global_unless_symbols(@links)
30
+ @links
30
31
  end
31
32
 
32
- def get_url_with_attr_img_link
33
- img_arra = []
34
- # пробегаю по тегам img (meta og:images...), хватаю урл и закидываю в @arra_links
33
+ # returns all images url with tags img['src']
34
+ def images_from_img_tag
35
+ img_array = []
36
+ # пробегаю по тегам img (meta og:images...), хватаю урл и закидываю в @links
35
37
  @doc.xpath('//img').each do |img|
36
38
  if img['src'] != nil
37
- arra = [img['src'].to_s]
38
- src = @handler_link.remove_unless_symbols(arra)
39
+ array = [img['src'].to_s]
40
+ src = @handler_link.remove_unless_symbols(array)
39
41
  got_link = @handler_link.handler_prefix_link(@link_host_name, src.to_s.delete!("[\"]"))
40
42
 
41
- img_arra << got_link
43
+ img_array << got_link
42
44
  end
43
45
  end
44
46
 
47
+ img_array.uniq!
48
+ @handler_link.remove_unless_link(img_array)
49
+ img_array
50
+ end
51
+
52
+ # returns all images url with tags link['href']
53
+ def images_from_link_tag
54
+ img_array = []
45
55
  @doc.xpath('//link').each do |link|
46
56
  if link['href'] != nil && link['type'] != nil
47
57
  if link['type'].include?("image")
48
58
  got_link = @handler_link.handler_prefix_link(@link_host_name, link['href'])
49
- img_arra << got_link
59
+ img_array << got_link
50
60
  end
51
61
  end
52
62
  end
53
63
 
54
- @arra_links.uniq!
55
- @handler_link.remove_unless_link(@arra_links)
56
- img_arra
64
+ img_array.uniq!
65
+ @handler_link.remove_unless_link(img_array)
66
+ img_array
57
67
  end
58
68
 
59
- def get_url_with_other_attr
60
- # нахожу все урлы с jpg, png, gif... и закидываю в @arra_links
69
+ # returns all images url with jpg, png, gif...
70
+ def images_from_extension
71
+ # нахожу все урлы с jpg, png, gif...
61
72
  @images_links = URI.extract(@doc.to_s.encode("UTF-16be", :invalid => :replace, :replace => "?").encode('UTF-8')).select { |l| l[/\.(?:gif|png|jpe?g)\b/] }
62
73
  @handler_link.handler_links(@images_links, @link) # обрабатываю урлы
63
74
  @handler_link.remove_unless_symbols(@images_links)
64
75
  @handler_link.remove_unless_link(@images_links)
65
- @images_links
76
+ @images_links.uniq
66
77
  end
67
78
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: images_from_link
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - zerocool
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-03-18 00:00:00.000000000 Z
11
+ date: 2017-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -96,10 +96,10 @@ files:
96
96
  - bin/console
97
97
  - bin/setup
98
98
  - images_from_link.gemspec
99
- - lib/HandlerLink.rb
100
- - lib/ImagesLink.rb
99
+ - lib/handler_link.rb
101
100
  - lib/images_from_link.rb
102
101
  - lib/images_from_link/version.rb
102
+ - lib/images_link.rb
103
103
  homepage: https://github.com/exwarvlad/images_from_link
104
104
  licenses:
105
105
  - MIT
@@ -1,70 +0,0 @@
1
- class HandlerLink
2
- FORMAT_IMG = ["jpg", "jpeg", "png", "gif"]
3
-
4
- # получает урл - отдает домен
5
- def get_host_link(link)
6
- uri = URI.parse(link)
7
- "#{uri.scheme}://" + uri.host
8
- end
9
-
10
- def handler_links(arra_links, link)
11
- arra_links.each do |url|
12
- if url.include?("(/")
13
- uri = get_host_link(link)
14
-
15
- position = url.index("(")
16
- url[position] += uri.to_s
17
- end
18
- end
19
- end
20
-
21
- # добавляет scheme, если это href
22
- def handler_prefix_link(host_link, link)
23
- abort 'в качестве аргументов передайте строки' unless host_link.is_a?(String) || link.is_a?(String)
24
-
25
- if link[0] == '/' && link[1] != '/'
26
- host_link + link
27
- elsif link[0..1] == '//'
28
- uri = URI.parse(host_link)
29
- "#{uri.scheme}:#{link}"
30
- else
31
- link
32
- end
33
- end
34
-
35
- def remove_unless_symbols(arra_images_links)
36
- arra_images_links.each do |image_url|
37
- if (image_url[0..3] != "http" || image_url[0..3] != "www.") && image_url.include?("(")
38
- position = image_url.index("(")
39
- image_url.reverse!
40
- position.times { image_url.chop! }
41
- image_url.reverse!
42
- image_url.delete!("(,;'')")
43
- end
44
- end
45
- end
46
-
47
- def remove_global_unless_symbols(arra_images_links)
48
- arra_images_links.each { |link| link.delete!("(,;'')") }
49
- end
50
-
51
- def remove_unless_link(arra_link)
52
-
53
- arra_link.each_with_index do |link, index|
54
- arra_link[index] = "" if link[0..3] != "http"
55
-
56
- index_ending = nil
57
-
58
- FORMAT_IMG.each do |i|
59
- index_ending = i if link.include?(i)
60
- end
61
-
62
- unless index_ending == nil
63
- position = link.index(index_ending)
64
- arra_link[index] = "" if (link[position + index_ending.size] =~ /[a-z]/)
65
- end
66
-
67
- end
68
- arra_link.delete("")
69
- end
70
- end