rssdump 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 95a5bf76f09fe0626531a62ffbabc791fd1ca4ac
4
- data.tar.gz: 8ba6115c49373a2281dcb120fa29b334d28abbe9
3
+ metadata.gz: 4a064db5574bf273163d1194ea130f14f9124f48
4
+ data.tar.gz: 3aec73daf1261d65a697fbfdf113d33c22d21a33
5
5
  SHA512:
6
- metadata.gz: 1ffe505a06cf39cc23cd5dc40818163f32f00e75c1dc93d822f3fd797b198c6c96f1b3e06f0e6fb0d4295035a3a13d826ecf42f0844bcc26ca229123d8e1293e
7
- data.tar.gz: 103b7df56f448c4e15c50b923b601716c15d98c54cc755b004723c96d0b8c4a7b8b4ff4d05816f51ea9e7041c61070b628f4df2e95ac31cd4a559c297eee2866
6
+ metadata.gz: 0b27cb123ebd759ff853f35d4ae763a056fc2696750284039c862bea6de4a5632871d822718c98dc666cedca93de069a18678881206dba410ec43e6cd5579bc2
7
+ data.tar.gz: c54ab35de1b51eaad4bd3baf42507f83a52cd7ba68b7af77e9c5eea13f4766c706223afcbafcb392ae0a34f5aef063d2225ae9e41456ddb6de62736a029e4975
data/lib/rssdump/item.rb CHANGED
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Rssdump
3
3
  class Item
4
- attr_accessor :link, :title, :description, :category, :pub_date, :feed, :feed_name
4
+ attr_accessor :link, :title, :description, :category, :pub_date, :feed
5
5
  end
6
6
  end
@@ -7,12 +7,11 @@ module Rssdump
7
7
  include Logging
8
8
  include Cleaning
9
9
 
10
- attr_reader :errors
11
-
12
- def scrap feed, feed_name = "_"
13
- @errors = []
14
- rss = SimpleRSS.parse ensure_valid(open(feed).read)
15
- rss.items.map do |item|
10
+ def scrap feed_url
11
+ rss = SimpleRSS.parse ensure_valid(open(feed_url).read)
12
+ status = :ok
13
+ errors = []
14
+ items = rss.items.map do |item|
16
15
  begin
17
16
  ritem = Item.new
18
17
  ritem.title = clean_html(item.title)
@@ -20,10 +19,11 @@ module Rssdump
20
19
  ritem.description = clean_html(item.description)
21
20
  ritem.pub_date = item.pubDate || item.updated
22
21
  ritem.link = clean_link(item.link)
23
- ritem.feed = feed
24
- ritem.feed_name = feed_name
22
+ ritem.feed = feed_url
25
23
  ritem
26
24
  rescue => e
25
+ status = :ko
26
+ errors << e
27
27
  logger.error "An error occurred during cleaning with item #{item.link}."
28
28
  logger.error "#{e}\n#{e.backtrace.join("\n")}"
29
29
  logger.warn "Ignoring item #{item.link}."
@@ -32,6 +32,11 @@ module Rssdump
32
32
  end.select do |item|
33
33
  !item.nil?
34
34
  end
35
+ {
36
+ status: status,
37
+ errors: errors,
38
+ items: items
39
+ }
35
40
  end
36
41
  end
37
42
  end
@@ -1,3 +1,3 @@
1
1
  module Rssdump
2
- VERSION = "0.1.0"
2
+ VERSION = "1.0.0"
3
3
  end
data/lib/rssdump.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require "logging"
2
2
 
3
3
  require_relative "./rssdump/cleaning"
4
- require_relative "./rssdump/mongo_store"
5
4
  require_relative "./rssdump/item"
6
5
  require_relative "./rssdump/dumper"
7
6
  require_relative "./rssdump/scraper"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rssdump
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Cram
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-04 00:00:00.000000000 Z
11
+ date: 2016-07-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,20 +66,6 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- - !ruby/object:Gem::Dependency
70
- name: mongo
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '2.1'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '2.1'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: awesome_print
85
71
  requirement: !ruby/object:Gem::Requirement
@@ -122,8 +108,7 @@ dependencies:
122
108
  - - ">="
123
109
  - !ruby/object:Gem::Version
124
110
  version: '0'
125
- description: Retrieves all items from an RSS feed and stores them to a MongoDB collection.
126
- Rssdump is based on simple-rss.
111
+ description: Retrieves all items from an RSS feed and clean them.
127
112
  email:
128
113
  - damien.cram@laposte.net
129
114
  executables: []
@@ -134,7 +119,6 @@ files:
134
119
  - lib/rssdump/cleaning.rb
135
120
  - lib/rssdump/dumper.rb
136
121
  - lib/rssdump/item.rb
137
- - lib/rssdump/mongo_store.rb
138
122
  - lib/rssdump/scraper.rb
139
123
  - lib/rssdump/tasks.rb
140
124
  - lib/rssdump/version.rb
@@ -160,6 +144,5 @@ rubyforge_project:
160
144
  rubygems_version: 2.5.1
161
145
  signing_key:
162
146
  specification_version: 4
163
- summary: Retrieves all items from an RSS feed and stores them to a MongoDB collection.
164
- Rssdump is based on simple-rss.
147
+ summary: Retrieves all items from an RSS feed and clean them.
165
148
  test_files: []
@@ -1,48 +0,0 @@
1
- require 'mongo'
2
-
3
- module Rssdump
4
- class MongoStore
5
- DEFAULT_URL = "mongodb://127.0.0.1:27017/rssdump"
6
- COLL_ITEMS = "items"
7
-
8
- def initialize url
9
- @url = url || DEFAULT_URL
10
- client[COLL_ITEMS].indexes.create_one({ :link => 1 }, :unique => true)
11
- client[COLL_ITEMS].indexes.create_one({ :pub_date => 1 }, :unique => false)
12
- @logger = Logging.logger[self]
13
- end
14
-
15
- def upsert item
16
- if client[COLL_ITEMS].find({link: item.link}).count == 0
17
- @logger.debug "Inserting new item #{item.link} to store"
18
- client[COLL_ITEMS].insert_one({
19
- v: Rssdump::VERSION,
20
- title: item.title,
21
- link: item.link,
22
- feed: item.feed,
23
- feed_name: item.feed_name,
24
- category: item.category,
25
- description: item.description,
26
- pub_date: item.pub_date,
27
- })
28
- true
29
- else
30
- false
31
- end
32
- end
33
-
34
- def nb_items
35
- client[COLL_ITEMS].count
36
- end
37
-
38
- def disk_usage_mb
39
- client.database.command({dbStats: 1, scale: 1024**2}).first["dataSize"]
40
- end
41
-
42
- private
43
-
44
- def client
45
- @client ||= Mongo::Client.new(@url)
46
- end
47
- end
48
- end