fly_parser 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a4b60890098babea5cdf0a19b2ab7f72207bce93
4
- data.tar.gz: a5cf7fdba982c30f622f2c048ea5ef08d6a4565a
3
+ metadata.gz: 04dc5a5ee6aefcaad08a66b4d86d418d2e2d6621
4
+ data.tar.gz: 8145b7beb20e9b7047c2422e3172a14b25218267
5
5
  SHA512:
6
- metadata.gz: 4d9bfcfbc33a83acbf053cca55206f27377a1f6ab62ead285def08fc43ef659ea61aee533c5c743b21ee06b8d7c22e79b77942370ce76ace69270e14f359f5fd
7
- data.tar.gz: f4b199bf649c121cbf1883e00acf451742dd25830076bc3b55a62258e149d1c314026e447580a3c8e2eab3d42b6648e5b9adeb763056aa3bec10322c62d6c288
6
+ metadata.gz: 4283c77559952bdcc2ed404e9b629c0263b806d9d0a776fed4d198674b6e92fb5601f65942fb9b29297fc7e2a42599ceba6740f2fcedcbce2bda0ec1432f9e0b
7
+ data.tar.gz: 37b9f632c4a943defa36e6d5d6a666ed3d3735379d8e0221b10afb881e64d4df096b92f9fac75d215a7cfe7cf833b502713feba5c298212385fe6daf2a21eed4
@@ -0,0 +1,41 @@
1
+ # add new parser source here
2
+ module Enable
3
+ def fitness(source)
4
+ lambda do |item|
5
+ item["parser"] = Parser::Exercise.new(item["url"], source: source) and next if item["type"] == "exercises"
6
+ item["parser"] = Parser::Fitness.new(item["url"], source: source)
7
+ end
8
+ end
9
+
10
+ def news(source)
11
+ lambda { |item| item["parser"] = Parser::News.new(item["url"], source: source) }
12
+ end
13
+
14
+ def local(source)
15
+ lambda { |item| item["parser"] = Parser::News.new(item["file"], {type: :file, source: source}) }
16
+ end
17
+
18
+ def news_nl(source)
19
+ lambda { |item| item["parser"] = Parser::NewsNl.new(item["url"], source: source) }
20
+ end
21
+
22
+ def news_fr(source)
23
+ lambda {|item| item["parser"] = Parser::NewsFr.new(item["url"], source: source)}
24
+ end
25
+
26
+ def method_missing(meth, *args)
27
+ prefix = "enable_"
28
+ meth = meth.to_s
29
+ if meth.start_with?(prefix)
30
+ meth_name = meth.split(prefix).last
31
+ proc = send(meth_name, *args)
32
+ iterate_sources(*args, proc)
33
+ else
34
+ raise "Unknown method #{meth} in Enable class, ssory !"
35
+ end
36
+ end
37
+
38
+ def iterate_sources(source, block)
39
+ source["items"].each(&block)
40
+ end
41
+ end
@@ -0,0 +1,39 @@
1
+ module Parser
2
+ class NewsFr < XmlBase
3
+ def initialize(source, options = {})
4
+ super
5
+ end
6
+
7
+ def parse_all
8
+ items = @source.search('//item')
9
+ # # last_date = Time.now - 2.years # for dev 2 years
10
+ # # select! or reject! is not exists for Nokogiri#NodeSet
11
+ # # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
12
+ items.map do |item|
13
+ title = item.xpath('title/text()').text()
14
+ date = item.xpath('pubdate').first.content()
15
+
16
+ link = item.xpath('link/following-sibling::text()[1]').first
17
+ page = Nokogiri::HTML(open(link))
18
+
19
+ next if page.search('figure.img img').first.nil?
20
+
21
+ poster_image = page.search('.article-long figure.img img').first.attributes['src'].value
22
+ full_desc = page.search('.article-long .bd')
23
+ full_desc.search('.modification').remove()
24
+ full_desc.search('script').remove()
25
+ full_desc.search('.ft').remove()
26
+ full_desc.search('a').remove_attr('href')
27
+ full_desc.search('.twitter-tweet').remove()
28
+
29
+ desc = full_desc.inner_html
30
+ desc.gsub! /h2|h1|h3/, 'h4'
31
+ # remove href attributes
32
+ #full_desc = full_desc.text().gsub(/<a href="([a-zA-Z:\/\.\d\-]*)">(.*)<\/a>/,'<a>\2</a>')
33
+ copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
34
+ content = desc + copyright
35
+ {title: title, content: content, poster_image: poster_image}
36
+ end.compact
37
+ end
38
+ end
39
+ end
data/lib/fly_parser.rb CHANGED
@@ -34,6 +34,7 @@ end
34
34
 
35
35
  module Parser
36
36
  class << self
37
+ include Enable if defined? Enable
37
38
  # Get HTTP Source
38
39
  def http(url)
39
40
  Nokogiri::HTML(open(url))
@@ -41,6 +42,7 @@ module Parser
41
42
 
42
43
  def connect(url)
43
44
  agent = Mechanize.new
45
+ agent.pluggable_parser.default = Mechanize::Page
44
46
  agent.get(url)
45
47
  end
46
48
 
@@ -93,29 +95,10 @@ module Parser
93
95
  File.read(LOGO_PATH)
94
96
  end
95
97
 
96
- # choose parser for source here
97
98
  def init_parser(source)
98
- case source["source"]
99
- when "fitness"
100
- source["items"].each do |item|
101
- item["parser"] = Parser::Exercise.new(item["url"], source: source) and next if item["type"] == "exercises"
102
- item["parser"] = Parser::Fitness.new(item["url"], source: source)
103
- end
104
- when "news"
105
- source["items"].each do |item|
106
- item["parser"] = Parser::News.new(item["url"], source: source)
107
- end
108
- when "local"
109
- if source["enabled"]
110
- source["items"].each do |item|
111
- item["parser"] = Parser::News.new(item["file"], {type: :file, source: source})
112
- end
113
- end
114
- when "news-nl"
115
- source["items"].each do |item|
116
- item["parser"] = Parser::NewsNl.new(item["url"], source: source)
117
- end
118
- end
99
+ source_type = source["source"].gsub('-', '_')
100
+ prefix = "enable_"
101
+ send(prefix + source_type, source)
119
102
  end
120
103
 
121
104
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fly_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ruslan Korolev
@@ -118,11 +118,13 @@ files:
118
118
  - lib/fly_parser.rb
119
119
  - lib/fly_parser/base.rb
120
120
  - lib/fly_parser/config_example.yml
121
+ - lib/fly_parser/enable_source.rb
121
122
  - lib/fly_parser/logo.txt
122
123
  - lib/fly_parser/mechanize_fix.rb
123
124
  - lib/fly_parser/sources/astrology.rb
124
125
  - lib/fly_parser/sources/exercise.rb
125
126
  - lib/fly_parser/sources/fitness.rb
127
+ - lib/fly_parser/sources/news-fr.rb
126
128
  - lib/fly_parser/sources/news-nl.rb
127
129
  - lib/fly_parser/sources/news.rb
128
130
  - lib/fly_parser/sources/sport.rb