fly_parser 0.0.23 → 0.0.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35c2945cf908943eea6512e24e7b39106cf9ee9b
4
- data.tar.gz: c8e48aa58849e9cc2696602f22a2b4a058fab8b3
3
+ metadata.gz: d7390fe6d8dd04f001c9d1df1d4876e828585438
4
+ data.tar.gz: 4baee214d519608b5ebf3cf891b16aea7acd9e2d
5
5
  SHA512:
6
- metadata.gz: 1bc47c495a712b29615ca23935f61011db8607bc48a2ebe4131da250f4fcada371dec4540a4dc5e43f93c394ca5b043787fcfa4bcbf336cf4c77f942cba18a79
7
- data.tar.gz: a62844515966e3af62cdad8ec7c50a57b68393def5f2f5049861cbc1bfd92b23f373712be63a8b786ef13b88d32b12260ca9242fad92518d4ab044d375284ba4
6
+ metadata.gz: 58133b0caf6bf9ec5b2714a4d29797c2156972f3701785dedf80a4eb9e484633db59a1807b07a9212b06ce687034d738b8b2cc86069ee3c1d3c432372cc82b7e
7
+ data.tar.gz: 216f6d754dd7ed4d8697885b2b1bf60b474d90763c5d7767daae999ce254384da889440d370c294947e572633d0accf9c697d74d0688eb606a80cf6995f1cbe0
@@ -116,8 +116,3 @@ module Parser
116
116
  end
117
117
 
118
118
  end
119
-
120
- # astrology
121
- # astro = Parser::Astrology.new('http://moj-znak-zodiaka.ru/')
122
- # astro.parse_in("Гороскоп на сентябрь 2014","big")
123
- # astro.parse_all
@@ -42,5 +42,13 @@ module Parser
42
42
  title: source['copyright_title']
43
43
  }
44
44
  end
45
+
46
+ def click(link)
47
+ agent = Mechanize.new
48
+ agent.ignore_bad_chunking = true
49
+ agent.pluggable_parser.default = Mechanize::Page
50
+ agent.click(link)
51
+ end
52
+
45
53
  end
46
- end
54
+ end
@@ -27,6 +27,10 @@ module Enable
27
27
  lambda {|item| item["parser"] = Parser::NewsAZ.new(item["url"], source: source)}
28
28
  end
29
29
 
30
+ def news_kz(source)
31
+ lambda {|item| item["parser"] = Parser::NewsKZ.new(item["url"], source: source)}
32
+ end
33
+
30
34
  def method_missing(meth, *args)
31
35
  prefix = "enable_"
32
36
  meth = meth.to_s
@@ -42,4 +46,4 @@ module Enable
42
46
  def iterate_sources(source, block)
43
47
  source["items"].each(&block)
44
48
  end
45
- end
49
+ end
@@ -0,0 +1,42 @@
1
+ module Parser
2
+ class NewsKZ < Base
3
+ def initialize(source, options = {})
4
+ @delay = 2
5
+ super
6
+ end
7
+
8
+ def parse_all
9
+ links = @source.search("li.c__news_item a:first")
10
+
11
+ links.map do |link|
12
+ page = click(link)
13
+ begin
14
+ title = page.search(".c__article_caption").text()
15
+
16
+ content_wrapper = page.search('.c__article_text')
17
+
18
+ image_wrapper = content_wrapper.search('.wp-caption img').first
19
+ next unless image_wrapper
20
+ poster_image = image_wrapper.attributes['src'].value
21
+
22
+ content_wrapper.search('.wp-caption').remove()
23
+ content_wrapper.search('.c__article_mistake').remove()
24
+ content_wrapper.search('p[style="display:none"]').remove()
25
+ content_wrapper.search("a").remove()
26
+ content_wrapper.search("span:contains(Копирование)").remove()
27
+
28
+ full_desc = content_wrapper.to_html
29
+ full_desc.gsub!(/<iframe.*><\/iframe>/, '')
30
+ copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
31
+ content = full_desc + copyright
32
+
33
+ {title: title, content: content, poster_image: poster_image}
34
+
35
+ rescue Exception => e
36
+ puts e.message
37
+ next
38
+ end
39
+ end.compact
40
+ end
41
+ end
42
+ end
@@ -1,3 +1,3 @@
1
1
  module Parser
2
- VERSION = "0.0.23"
2
+ VERSION = "0.0.24"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fly_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.23
4
+ version: 0.0.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ruslan Korolev
@@ -135,11 +135,11 @@ files:
135
135
  - lib/fly_parser/enable_source.rb
136
136
  - lib/fly_parser/logo.txt
137
137
  - lib/fly_parser/mechanize_fix.rb
138
- - lib/fly_parser/sources/astrology.rb
139
138
  - lib/fly_parser/sources/exercise.rb
140
139
  - lib/fly_parser/sources/fitness.rb
141
140
  - lib/fly_parser/sources/news-az.rb
142
141
  - lib/fly_parser/sources/news-fr.rb
142
+ - lib/fly_parser/sources/news-kz.rb
143
143
  - lib/fly_parser/sources/news-nl.rb
144
144
  - lib/fly_parser/sources/news.rb
145
145
  - lib/fly_parser/sources/sport.rb
@@ -170,3 +170,4 @@ signing_key:
170
170
  specification_version: 4
171
171
  summary: Fly parser
172
172
  test_files: []
173
+ has_rdoc:
@@ -1,67 +0,0 @@
1
- require 'hashie'
2
- module Parser
3
- class Astrology
4
-
5
- def initialize(source)
6
- @zodiacs = ['Овен','Телец','Близнецы','Рак','Лев','Дева','Весы','Скорпион','Стрелец','Козерог','Водолей','Рыбы']
7
- @source = Parser.connect(source)
8
-
9
- small_titles = ["Гороскоп на сегодня", "Гороскоп на завтра", "Гороскоп на неделю"]
10
- big_titles = ["Гороскопы на месяц", "Гороскоп на 2014 год", "Гороскоп на 2014 год зеленой Лошади", "Гороскоп на сентябрь 2014"]
11
- @titles = Hashie::Mash.new
12
- @titles.small = small_titles
13
- @titles.big = big_titles
14
- end
15
-
16
- def parse_in(text = "Гороскоп на сегодня", date = 'small')
17
- @text = text
18
- @date = date
19
- parse_content
20
- end
21
-
22
- def parse_content
23
- zodiac_links.map do |item|
24
- link = item.link
25
- zodiac = item.zodiac
26
- @page = Parser.http(link.value)
27
- content = (@date == 'small' ? parse_small : parse_big)
28
- result = Hashie::Mash.new
29
- result.zodiac = zodiac
30
- result.content = content
31
- result
32
- end
33
- end
34
-
35
- def parse_all
36
- small_content = @titles.small.map { |title| {title: title, content: parse_in(title,"small")} }
37
- big_content = @titles.big.map { |title| {title: title, content: parse_in(title,"big")} }
38
-
39
- small_content.concat big_content
40
- end
41
-
42
- private
43
-
44
- def current_page
45
- @source.link_with(:text => @text).click
46
- end
47
-
48
- def zodiac_links
49
- @zodiacs.map do |z|
50
- result = Hashie::Mash.new
51
- result.link = current_page.search("a:contains('#{z}')")[0].attributes["href"]
52
- result.zodiac = z
53
- result
54
- end
55
- end
56
-
57
- def parse_small
58
- @page.css('#main').children().reject do |el|
59
- (el.attributes['class'].value == 'lp50' || el.attributes['class'].value == 'rp50' || el.attributes['class'].value == "space" if el.attributes['class'] != nil) || ['img','br','b','h1'].include?(el.name) || ["\n","\n\n"].include?(el.text)
60
- end.join
61
- end
62
-
63
- def parse_big
64
- @page.css('#main .lp50').text()
65
- end
66
- end
67
- end