fly_parser 0.0.23 → 0.0.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/fly_parser.rb +0 -5
- data/lib/fly_parser/base.rb +9 -1
- data/lib/fly_parser/enable_source.rb +5 -1
- data/lib/fly_parser/sources/news-kz.rb +42 -0
- data/lib/fly_parser/version.rb +1 -1
- metadata +3 -2
- data/lib/fly_parser/sources/astrology.rb +0 -67
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7390fe6d8dd04f001c9d1df1d4876e828585438
|
4
|
+
data.tar.gz: 4baee214d519608b5ebf3cf891b16aea7acd9e2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 58133b0caf6bf9ec5b2714a4d29797c2156972f3701785dedf80a4eb9e484633db59a1807b07a9212b06ce687034d738b8b2cc86069ee3c1d3c432372cc82b7e
|
7
|
+
data.tar.gz: 216f6d754dd7ed4d8697885b2b1bf60b474d90763c5d7767daae999ce254384da889440d370c294947e572633d0accf9c697d74d0688eb606a80cf6995f1cbe0
|
data/lib/fly_parser.rb
CHANGED
data/lib/fly_parser/base.rb
CHANGED
@@ -27,6 +27,10 @@ module Enable
|
|
27
27
|
lambda {|item| item["parser"] = Parser::NewsAZ.new(item["url"], source: source)}
|
28
28
|
end
|
29
29
|
|
30
|
+
def news_kz(source)
|
31
|
+
lambda {|item| item["parser"] = Parser::NewsKZ.new(item["url"], source: source)}
|
32
|
+
end
|
33
|
+
|
30
34
|
def method_missing(meth, *args)
|
31
35
|
prefix = "enable_"
|
32
36
|
meth = meth.to_s
|
@@ -42,4 +46,4 @@ module Enable
|
|
42
46
|
def iterate_sources(source, block)
|
43
47
|
source["items"].each(&block)
|
44
48
|
end
|
45
|
-
end
|
49
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Parser
|
2
|
+
class NewsKZ < Base
|
3
|
+
def initialize(source, options = {})
|
4
|
+
@delay = 2
|
5
|
+
super
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse_all
|
9
|
+
links = @source.search("li.c__news_item a:first")
|
10
|
+
|
11
|
+
links.map do |link|
|
12
|
+
page = click(link)
|
13
|
+
begin
|
14
|
+
title = page.search(".c__article_caption").text()
|
15
|
+
|
16
|
+
content_wrapper = page.search('.c__article_text')
|
17
|
+
|
18
|
+
image_wrapper = content_wrapper.search('.wp-caption img').first
|
19
|
+
next unless image_wrapper
|
20
|
+
poster_image = image_wrapper.attributes['src'].value
|
21
|
+
|
22
|
+
content_wrapper.search('.wp-caption').remove()
|
23
|
+
content_wrapper.search('.c__article_mistake').remove()
|
24
|
+
content_wrapper.search('p[style="display:none"]').remove()
|
25
|
+
content_wrapper.search("a").remove()
|
26
|
+
content_wrapper.search("span:contains(Копирование)").remove()
|
27
|
+
|
28
|
+
full_desc = content_wrapper.to_html
|
29
|
+
full_desc.gsub!(/<iframe.*><\/iframe>/, '')
|
30
|
+
copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
|
31
|
+
content = full_desc + copyright
|
32
|
+
|
33
|
+
{title: title, content: content, poster_image: poster_image}
|
34
|
+
|
35
|
+
rescue Exception => e
|
36
|
+
puts e.message
|
37
|
+
next
|
38
|
+
end
|
39
|
+
end.compact
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/fly_parser/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fly_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ruslan Korolev
|
@@ -135,11 +135,11 @@ files:
|
|
135
135
|
- lib/fly_parser/enable_source.rb
|
136
136
|
- lib/fly_parser/logo.txt
|
137
137
|
- lib/fly_parser/mechanize_fix.rb
|
138
|
-
- lib/fly_parser/sources/astrology.rb
|
139
138
|
- lib/fly_parser/sources/exercise.rb
|
140
139
|
- lib/fly_parser/sources/fitness.rb
|
141
140
|
- lib/fly_parser/sources/news-az.rb
|
142
141
|
- lib/fly_parser/sources/news-fr.rb
|
142
|
+
- lib/fly_parser/sources/news-kz.rb
|
143
143
|
- lib/fly_parser/sources/news-nl.rb
|
144
144
|
- lib/fly_parser/sources/news.rb
|
145
145
|
- lib/fly_parser/sources/sport.rb
|
@@ -170,3 +170,4 @@ signing_key:
|
|
170
170
|
specification_version: 4
|
171
171
|
summary: Fly parser
|
172
172
|
test_files: []
|
173
|
+
has_rdoc:
|
@@ -1,67 +0,0 @@
|
|
1
|
-
require 'hashie'
|
2
|
-
module Parser
|
3
|
-
class Astrology
|
4
|
-
|
5
|
-
def initialize(source)
|
6
|
-
@zodiacs = ['Овен','Телец','Близнецы','Рак','Лев','Дева','Весы','Скорпион','Стрелец','Козерог','Водолей','Рыбы']
|
7
|
-
@source = Parser.connect(source)
|
8
|
-
|
9
|
-
small_titles = ["Гороскоп на сегодня", "Гороскоп на завтра", "Гороскоп на неделю"]
|
10
|
-
big_titles = ["Гороскопы на месяц", "Гороскоп на 2014 год", "Гороскоп на 2014 год зеленой Лошади", "Гороскоп на сентябрь 2014"]
|
11
|
-
@titles = Hashie::Mash.new
|
12
|
-
@titles.small = small_titles
|
13
|
-
@titles.big = big_titles
|
14
|
-
end
|
15
|
-
|
16
|
-
def parse_in(text = "Гороскоп на сегодня", date = 'small')
|
17
|
-
@text = text
|
18
|
-
@date = date
|
19
|
-
parse_content
|
20
|
-
end
|
21
|
-
|
22
|
-
def parse_content
|
23
|
-
zodiac_links.map do |item|
|
24
|
-
link = item.link
|
25
|
-
zodiac = item.zodiac
|
26
|
-
@page = Parser.http(link.value)
|
27
|
-
content = (@date == 'small' ? parse_small : parse_big)
|
28
|
-
result = Hashie::Mash.new
|
29
|
-
result.zodiac = zodiac
|
30
|
-
result.content = content
|
31
|
-
result
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def parse_all
|
36
|
-
small_content = @titles.small.map { |title| {title: title, content: parse_in(title,"small")} }
|
37
|
-
big_content = @titles.big.map { |title| {title: title, content: parse_in(title,"big")} }
|
38
|
-
|
39
|
-
small_content.concat big_content
|
40
|
-
end
|
41
|
-
|
42
|
-
private
|
43
|
-
|
44
|
-
def current_page
|
45
|
-
@source.link_with(:text => @text).click
|
46
|
-
end
|
47
|
-
|
48
|
-
def zodiac_links
|
49
|
-
@zodiacs.map do |z|
|
50
|
-
result = Hashie::Mash.new
|
51
|
-
result.link = current_page.search("a:contains('#{z}')")[0].attributes["href"]
|
52
|
-
result.zodiac = z
|
53
|
-
result
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def parse_small
|
58
|
-
@page.css('#main').children().reject do |el|
|
59
|
-
(el.attributes['class'].value == 'lp50' || el.attributes['class'].value == 'rp50' || el.attributes['class'].value == "space" if el.attributes['class'] != nil) || ['img','br','b','h1'].include?(el.name) || ["\n","\n\n"].include?(el.text)
|
60
|
-
end.join
|
61
|
-
end
|
62
|
-
|
63
|
-
def parse_big
|
64
|
-
@page.css('#main .lp50').text()
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|