rssdump 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95a5bf76f09fe0626531a62ffbabc791fd1ca4ac
4
- data.tar.gz: 8ba6115c49373a2281dcb120fa29b334d28abbe9
3
+ metadata.gz: 4a064db5574bf273163d1194ea130f14f9124f48
4
+ data.tar.gz: 3aec73daf1261d65a697fbfdf113d33c22d21a33
5
5
  SHA512:
6
- metadata.gz: 1ffe505a06cf39cc23cd5dc40818163f32f00e75c1dc93d822f3fd797b198c6c96f1b3e06f0e6fb0d4295035a3a13d826ecf42f0844bcc26ca229123d8e1293e
7
- data.tar.gz: 103b7df56f448c4e15c50b923b601716c15d98c54cc755b004723c96d0b8c4a7b8b4ff4d05816f51ea9e7041c61070b628f4df2e95ac31cd4a559c297eee2866
6
+ metadata.gz: 0b27cb123ebd759ff853f35d4ae763a056fc2696750284039c862bea6de4a5632871d822718c98dc666cedca93de069a18678881206dba410ec43e6cd5579bc2
7
+ data.tar.gz: c54ab35de1b51eaad4bd3baf42507f83a52cd7ba68b7af77e9c5eea13f4766c706223afcbafcb392ae0a34f5aef063d2225ae9e41456ddb6de62736a029e4975
data/lib/rssdump/item.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Rssdump
3
3
  class Item
4
- attr_accessor :link, :title, :description, :category, :pub_date, :feed, :feed_name
4
+ attr_accessor :link, :title, :description, :category, :pub_date, :feed
5
5
  end
6
6
  end
@@ -7,12 +7,11 @@ module Rssdump
7
7
  include Logging
8
8
  include Cleaning
9
9
 
10
- attr_reader :errors
11
-
12
- def scrap feed, feed_name = "_"
13
- @errors = []
14
- rss = SimpleRSS.parse ensure_valid(open(feed).read)
15
- rss.items.map do |item|
10
+ def scrap feed_url
11
+ rss = SimpleRSS.parse ensure_valid(open(feed_url).read)
12
+ status = :ok
13
+ errors = []
14
+ items = rss.items.map do |item|
16
15
  begin
17
16
  ritem = Item.new
18
17
  ritem.title = clean_html(item.title)
@@ -20,10 +19,11 @@ module Rssdump
20
19
  ritem.description = clean_html(item.description)
21
20
  ritem.pub_date = item.pubDate || item.updated
22
21
  ritem.link = clean_link(item.link)
23
- ritem.feed = feed
24
- ritem.feed_name = feed_name
22
+ ritem.feed = feed_url
25
23
  ritem
26
24
  rescue => e
25
+ status = :ko
26
+ errors << e
27
27
  logger.error "An error occurred during cleaning with item #{item.link}."
28
28
  logger.error "#{e}\n#{e.backtrace.join("\n")}"
29
29
  logger.warn "Ignoring item #{item.link}."
@@ -32,6 +32,11 @@ module Rssdump
32
32
  end.select do |item|
33
33
  !item.nil?
34
34
  end
35
+ {
36
+ status: status,
37
+ errors: errors,
38
+ items: items
39
+ }
35
40
  end
36
41
  end
37
42
  end
@@ -1,3 +1,3 @@
1
1
  module Rssdump
2
- VERSION = "0.1.0"
2
+ VERSION = "1.0.0"
3
3
  end
data/lib/rssdump.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require "logging"
2
2
 
3
3
  require_relative "./rssdump/cleaning"
4
- require_relative "./rssdump/mongo_store"
5
4
  require_relative "./rssdump/item"
6
5
  require_relative "./rssdump/dumper"
7
6
  require_relative "./rssdump/scraper"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rssdump
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Cram
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-04 00:00:00.000000000 Z
11
+ date: 2016-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,20 +66,6 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: mongo
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '2.1'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '2.1'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: awesome_print
85
71
  requirement: !ruby/object:Gem::Requirement
@@ -122,8 +108,7 @@ dependencies:
122
108
  - - ">="
123
109
  - !ruby/object:Gem::Version
124
110
  version: '0'
125
- description: Retrieves all items from an RSS feed and stores them to a MongoDB collection.
126
- Rssdump is based on simple-rss.
111
+ description: Retrieves all items from an RSS feed and clean them.
127
112
  email:
128
113
  - damien.cram@laposte.net
129
114
  executables: []
@@ -134,7 +119,6 @@ files:
134
119
  - lib/rssdump/cleaning.rb
135
120
  - lib/rssdump/dumper.rb
136
121
  - lib/rssdump/item.rb
137
- - lib/rssdump/mongo_store.rb
138
122
  - lib/rssdump/scraper.rb
139
123
  - lib/rssdump/tasks.rb
140
124
  - lib/rssdump/version.rb
@@ -160,6 +144,5 @@ rubyforge_project:
160
144
  rubygems_version: 2.5.1
161
145
  signing_key:
162
146
  specification_version: 4
163
- summary: Retrieves all items from an RSS feed and stores them to a MongoDB collection.
164
- Rssdump is based on simple-rss.
147
+ summary: Retrieves all items from an RSS feed and clean them.
165
148
  test_files: []
@@ -1,48 +0,0 @@
1
- require 'mongo'
2
-
3
- module Rssdump
4
- class MongoStore
5
- DEFAULT_URL = "mongodb://127.0.0.1:27017/rssdump"
6
- COLL_ITEMS = "items"
7
-
8
- def initialize url
9
- @url = url || DEFAULT_URL
10
- client[COLL_ITEMS].indexes.create_one({ :link => 1 }, :unique => true)
11
- client[COLL_ITEMS].indexes.create_one({ :pub_date => 1 }, :unique => false)
12
- @logger = Logging.logger[self]
13
- end
14
-
15
- def upsert item
16
- if client[COLL_ITEMS].find({link: item.link}).count == 0
17
- @logger.debug "Inserting new item #{item.link} to store"
18
- client[COLL_ITEMS].insert_one({
19
- v: Rssdump::VERSION,
20
- title: item.title,
21
- link: item.link,
22
- feed: item.feed,
23
- feed_name: item.feed_name,
24
- category: item.category,
25
- description: item.description,
26
- pub_date: item.pub_date,
27
- })
28
- true
29
- else
30
- false
31
- end
32
- end
33
-
34
- def nb_items
35
- client[COLL_ITEMS].count
36
- end
37
-
38
- def disk_usage_mb
39
- client.database.command({dbStats: 1, scale: 1024**2}).first["dataSize"]
40
- end
41
-
42
- private
43
-
44
- def client
45
- @client ||= Mongo::Client.new(@url)
46
- end
47
- end
48
- end