pluto 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +2 -0
- data/lib/pluto.rb +24 -1
- data/lib/pluto/cli/opts.rb +1 -1
- data/lib/pluto/cli/runner.rb +4 -16
- data/lib/pluto/connecter.rb +70 -0
- data/lib/pluto/fetcher.rb +6 -163
- data/lib/pluto/models.rb +18 -1
- data/lib/pluto/schema.rb +12 -0
- data/lib/pluto/server.rb +15 -5
- data/lib/pluto/updater.rb +224 -0
- data/lib/pluto/version.rb +1 -1
- metadata +13 -11
data/Manifest.txt
CHANGED
@@ -6,6 +6,7 @@ bin/pluto
|
|
6
6
|
lib/pluto.rb
|
7
7
|
lib/pluto/cli/opts.rb
|
8
8
|
lib/pluto/cli/runner.rb
|
9
|
+
lib/pluto/connecter.rb
|
9
10
|
lib/pluto/fetcher.rb
|
10
11
|
lib/pluto/formatter.rb
|
11
12
|
lib/pluto/models.rb
|
@@ -17,6 +18,7 @@ lib/pluto/server/views/_version.erb
|
|
17
18
|
lib/pluto/server/views/debug.erb
|
18
19
|
lib/pluto/server/views/index.erb
|
19
20
|
lib/pluto/server/views/layout.erb
|
21
|
+
lib/pluto/updater.rb
|
20
22
|
lib/pluto/version.rb
|
21
23
|
templates/blank.html.erb
|
22
24
|
templates/blank.top.html.erb
|
data/lib/pluto.rb
CHANGED
@@ -6,11 +6,15 @@
|
|
6
6
|
# core and stlibs
|
7
7
|
|
8
8
|
require 'yaml'
|
9
|
+
require 'json'
|
10
|
+
require 'uri'
|
9
11
|
require 'pp'
|
10
12
|
require 'logger'
|
11
13
|
require 'optparse'
|
12
14
|
require 'fileutils'
|
13
15
|
|
16
|
+
|
17
|
+
|
14
18
|
require 'rss'
|
15
19
|
|
16
20
|
# rubygems
|
@@ -28,6 +32,8 @@ require 'pakman' # template pack manager
|
|
28
32
|
require 'pluto/version' # let version always get first
|
29
33
|
require 'pluto/schema'
|
30
34
|
require 'pluto/models'
|
35
|
+
require 'pluto/connecter'
|
36
|
+
require 'pluto/updater'
|
31
37
|
require 'pluto/fetcher'
|
32
38
|
require 'pluto/formatter'
|
33
39
|
|
@@ -44,6 +50,19 @@ module Pluto
|
|
44
50
|
"#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
|
45
51
|
end
|
46
52
|
|
53
|
+
def self.connect!( config=nil ) # convenience shortcut
|
54
|
+
Connecter.new.connect!( config )
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.update_subscriptions( config )
|
58
|
+
Updater.new.update_subscriptions( config )
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.update_feeds
|
62
|
+
Updater.new.update_feeds
|
63
|
+
end
|
64
|
+
|
65
|
+
|
47
66
|
def self.main
|
48
67
|
Runner.new.run(ARGV)
|
49
68
|
end
|
@@ -51,4 +70,8 @@ module Pluto
|
|
51
70
|
end # module Pluto
|
52
71
|
|
53
72
|
|
54
|
-
|
73
|
+
if __FILE__ == $0
|
74
|
+
Pluto.main
|
75
|
+
else
|
76
|
+
puts Pluto.banner # say hello
|
77
|
+
end
|
data/lib/pluto/cli/opts.rb
CHANGED
data/lib/pluto/cli/runner.rb
CHANGED
@@ -5,7 +5,7 @@ class Runner
|
|
5
5
|
|
6
6
|
include LogUtils::Logging
|
7
7
|
|
8
|
-
include Models # e.g. Feed,Item,etc.
|
8
|
+
include Models # e.g. Feed,Item,Site,etc.
|
9
9
|
|
10
10
|
def initialize
|
11
11
|
@opts = Opts.new
|
@@ -37,6 +37,7 @@ class Runner
|
|
37
37
|
|
38
38
|
cmd.on( "--verbose", "Show debug trace" ) do
|
39
39
|
LogUtils::Logger.root.level = :debug
|
40
|
+
opts.verbose = true
|
40
41
|
end
|
41
42
|
|
42
43
|
## todo: add/allow -? too
|
@@ -68,10 +69,10 @@ EOS
|
|
68
69
|
|
69
70
|
db_config = {
|
70
71
|
adapter: 'sqlite3',
|
71
|
-
database: "#{opts.output_path}/#{name}.
|
72
|
+
database: "#{opts.output_path}/#{name}.db"
|
72
73
|
}
|
73
74
|
|
74
|
-
|
75
|
+
Connecter.new.connect!( db_config )
|
75
76
|
|
76
77
|
config_path = arg.dup # add .yml file extension if missing (for convenience)
|
77
78
|
config_path << '.yml' unless config_path.ends_with?( '.yml' )
|
@@ -91,18 +92,5 @@ EOS
|
|
91
92
|
end # method run
|
92
93
|
|
93
94
|
|
94
|
-
private
|
95
|
-
|
96
|
-
def setup_db( db_config )
|
97
|
-
puts 'db settings:'
|
98
|
-
pp db_config
|
99
|
-
|
100
|
-
ActiveRecord::Base.establish_connection( db_config )
|
101
|
-
|
102
|
-
unless Feed.table_exists?
|
103
|
-
CreateDb.new.up # run db migratation, that is, create db tables
|
104
|
-
end
|
105
|
-
end # method setup_db
|
106
|
-
|
107
95
|
end # class Runner
|
108
96
|
end # module Pakman
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
|
4
|
+
# DB Connecter / Connection Manager
|
5
|
+
# lets you establish connection
|
6
|
+
|
7
|
+
class Connecter
|
8
|
+
|
9
|
+
include LogUtils::Logging
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
# do nothing for now
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def debug=(value)
|
17
|
+
@debug = value
|
18
|
+
end
|
19
|
+
|
20
|
+
def debug?
|
21
|
+
@debug || false
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def connect!( config = nil )
|
26
|
+
|
27
|
+
if config.nil? # use DATABASE_URL
|
28
|
+
|
29
|
+
logger.debug "ENV['DATBASE_URL'] - >#{ENV['DATABASE_URL']}<"
|
30
|
+
|
31
|
+
db = URI.parse( ENV['DATABASE_URL'] || 'sqlite3:///pluto.db' )
|
32
|
+
|
33
|
+
if db.scheme == 'postgres'
|
34
|
+
config = {
|
35
|
+
adapter: 'postgresql',
|
36
|
+
host: db.host,
|
37
|
+
port: db.port,
|
38
|
+
username: db.user,
|
39
|
+
password: db.password,
|
40
|
+
database: db.path[1..-1],
|
41
|
+
encoding: 'utf8'
|
42
|
+
}
|
43
|
+
else
|
44
|
+
config = {
|
45
|
+
adapter: db.scheme, # sqlite3
|
46
|
+
database: db.path[1..-1] # pluto.db (NB: cut off leading /, thus 1..-1)
|
47
|
+
}
|
48
|
+
end
|
49
|
+
end # if config.nil?
|
50
|
+
|
51
|
+
puts 'db settings:'
|
52
|
+
pp config
|
53
|
+
|
54
|
+
# for debugging - disable for production use
|
55
|
+
if debug?
|
56
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
57
|
+
end
|
58
|
+
|
59
|
+
ActiveRecord::Base.establish_connection( config )
|
60
|
+
|
61
|
+
# first time? - auto-run db migratation, that is, create db tables
|
62
|
+
unless Models::Feed.table_exists?
|
63
|
+
CreateDb.new.up
|
64
|
+
end
|
65
|
+
end # method connect!
|
66
|
+
|
67
|
+
|
68
|
+
end # class Connecter
|
69
|
+
|
70
|
+
end # module Pluto
|
data/lib/pluto/fetcher.rb
CHANGED
@@ -5,184 +5,27 @@ class Fetcher
|
|
5
5
|
|
6
6
|
include LogUtils::Logging
|
7
7
|
|
8
|
-
include Models
|
9
|
-
|
10
8
|
def initialize( opts, config )
|
11
9
|
@opts = opts
|
12
10
|
@config = config
|
13
|
-
@worker = ::Fetcher::Worker.new
|
14
11
|
end
|
15
12
|
|
16
|
-
attr_reader :opts, :config
|
17
|
-
|
18
|
-
|
19
|
-
def fetch_feed( url )
|
20
|
-
xml = worker.read( url )
|
21
|
-
|
22
|
-
###
|
23
|
-
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
24
|
-
# will mostly be ASCII
|
25
|
-
# - try to change encoding to UTF-8 ourselves
|
26
|
-
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
27
|
-
|
28
|
-
#####
|
29
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
30
|
-
|
31
|
-
## NB:
|
32
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
33
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
34
|
-
xml = xml.force_encoding( Encoding::UTF_8 )
|
35
|
-
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
36
|
-
xml
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
def parse_feed( xml )
|
41
|
-
parser = RSS::Parser.new( xml )
|
42
|
-
parser.do_validate = false
|
43
|
-
parser.ignore_unknown_element = true
|
44
|
-
|
45
|
-
puts "Parsing feed..."
|
46
|
-
feed = parser.parse
|
47
|
-
|
48
|
-
puts " feed.class=#{feed.class.name}"
|
49
|
-
feed
|
50
|
-
end
|
13
|
+
attr_reader :opts, :config
|
51
14
|
|
52
15
|
|
53
16
|
def run
|
54
|
-
logger.debug "RSS::VERSION #{RSS::VERSION}"
|
55
|
-
|
56
|
-
config[ 'feeds' ].each do |feed_key|
|
57
|
-
|
58
|
-
feed_hash = config[ feed_key ]
|
59
|
-
feed_url = feed_hash[ 'feed_url' ]
|
60
|
-
|
61
|
-
puts "Fetching feed >#{feed_key}< using >#{feed_url}<..."
|
62
|
-
|
63
|
-
feed_rec = Feed.find_by_key( feed_key )
|
64
|
-
if feed_rec.nil?
|
65
|
-
feed_rec = Feed.new
|
66
|
-
feed_rec.key = feed_key
|
67
|
-
end
|
68
|
-
feed_rec.feed_url = feed_url
|
69
|
-
feed_rec.url = feed_hash[ 'url' ]
|
70
|
-
feed_rec.title = feed_hash[ 'title' ] # todo: use title from feed?
|
71
|
-
feed_rec.save!
|
72
|
-
|
73
|
-
feed_xml = fetch_feed( feed_url )
|
74
|
-
|
75
|
-
# if opts.verbose? # also write a copy to disk
|
76
|
-
# ## fix: use just File.write instead of fetching again
|
77
|
-
# worker.copy( feed_url, "./#{feed_key}.xml" )
|
78
|
-
# end
|
79
|
-
|
80
|
-
# xml = File.read( "./#{feed_key}.xml" )
|
81
|
-
|
82
|
-
puts "Before parsing feed >#{feed_key}<..."
|
83
17
|
|
84
|
-
|
18
|
+
updater = Updater.new
|
85
19
|
|
86
|
-
|
87
|
-
|
88
|
-
else ## assume RSS::Rss::Feed
|
89
|
-
puts "== #{feed.channel.title} =="
|
90
|
-
end
|
20
|
+
# pass along debug/verbose setting/switch
|
21
|
+
updater.debug = true if opts.verbose?
|
91
22
|
|
92
|
-
|
93
|
-
|
94
|
-
item_attribs = handle_feed_item_atom( item )
|
95
|
-
else ## assume RSS::Rss::Feed
|
96
|
-
item_attribs = handle_feed_item_rss( item )
|
97
|
-
end
|
98
|
-
|
99
|
-
# add feed_id fk_ref
|
100
|
-
item_attribs[ :feed_id ] = feed_rec.id
|
101
|
-
|
102
|
-
rec = Item.find_by_guid( item_attribs[ :guid ] )
|
103
|
-
if rec.nil?
|
104
|
-
rec = Item.new
|
105
|
-
puts "** NEW"
|
106
|
-
else
|
107
|
-
puts "UPDATE"
|
108
|
-
end
|
109
|
-
|
110
|
-
rec.update_attributes!( item_attribs )
|
111
|
-
end # each item
|
112
|
-
|
113
|
-
end # each feed
|
23
|
+
updater.update_subscriptions( config )
|
24
|
+
updater.update_feeds
|
114
25
|
|
115
26
|
end # method run
|
116
27
|
|
117
28
|
|
118
|
-
def handle_feed_item_atom( item )
|
119
|
-
|
120
|
-
## todo: if content.content empty use summary for example
|
121
|
-
item_attribs = {
|
122
|
-
title: item.title.content,
|
123
|
-
url: item.link.href,
|
124
|
-
published_at: item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" ),
|
125
|
-
# content: item.content.content,
|
126
|
-
}
|
127
|
-
|
128
|
-
item_attribs[ :guid ] = item.id.content
|
129
|
-
|
130
|
-
if item.summary
|
131
|
-
item_attribs[ :content ] = item.summary.content
|
132
|
-
else
|
133
|
-
if item.content
|
134
|
-
text = item.content.content.dup
|
135
|
-
## strip all html tags
|
136
|
-
text = text.gsub( /<[^>]+>/, '' )
|
137
|
-
text = text[ 0..400 ] # get first 400 chars
|
138
|
-
## todo: check for length if > 400 add ... at the end???
|
139
|
-
item_attribs[ :content ] = text
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
puts "- #{item.title.content}"
|
144
|
-
puts " link >#{item.link.href}<"
|
145
|
-
puts " id (~guid) >#{item.id.content}<"
|
146
|
-
|
147
|
-
### todo: use/try published first? why? why not?
|
148
|
-
puts " updated (~pubDate) >#{item.updated.content}< >#{item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.updated.content.class.name}"
|
149
|
-
puts
|
150
|
-
|
151
|
-
# puts "*** dump item:"
|
152
|
-
# pp item
|
153
|
-
|
154
|
-
item_attribs
|
155
|
-
end
|
156
|
-
|
157
|
-
def handle_feed_item_rss( item )
|
158
|
-
|
159
|
-
item_attribs = {
|
160
|
-
title: item.title,
|
161
|
-
url: item.link,
|
162
|
-
published_at: item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" ),
|
163
|
-
# content: item.content_encoded,
|
164
|
-
}
|
165
|
-
|
166
|
-
# if item.content_encoded.nil?
|
167
|
-
# puts " using description for content"
|
168
|
-
item_attribs[ :content ] = item.description
|
169
|
-
# end
|
170
|
-
|
171
|
-
item_attribs[ :guid ] = item.guid.content
|
172
|
-
|
173
|
-
puts "- #{item.title}"
|
174
|
-
puts " link (#{item.link})"
|
175
|
-
puts " guid (#{item.guid.content})"
|
176
|
-
puts " pubDate >#{item.pubDate}< >#{item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.pubDate.class.name}"
|
177
|
-
puts
|
178
|
-
|
179
|
-
# puts "*** dump item:"
|
180
|
-
# pp item
|
181
|
-
|
182
|
-
item_attribs
|
183
|
-
end
|
184
|
-
|
185
|
-
|
186
29
|
end # class Fetcher
|
187
30
|
|
188
31
|
end # module Pluto
|
data/lib/pluto/models.rb
CHANGED
@@ -5,6 +5,8 @@ class Feed < ActiveRecord::Base
|
|
5
5
|
self.table_name = 'feeds'
|
6
6
|
|
7
7
|
has_many :items
|
8
|
+
has_many :subscriptions
|
9
|
+
has_many :sites, :through => :subscriptions
|
8
10
|
end
|
9
11
|
|
10
12
|
class Item < ActiveRecord::Base
|
@@ -13,9 +15,24 @@ class Item < ActiveRecord::Base
|
|
13
15
|
belongs_to :feed
|
14
16
|
|
15
17
|
def self.latest
|
16
|
-
|
18
|
+
order( 'published_at desc' )
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
22
|
+
class Site < ActiveRecord::Base
|
23
|
+
self.table_name = 'sites'
|
24
|
+
|
25
|
+
has_many :subscriptions
|
26
|
+
has_many :feeds, :through => :subscriptions
|
27
|
+
end
|
28
|
+
|
29
|
+
class Subscription < ActiveRecord::Base
|
30
|
+
self.table_name = 'subscriptions'
|
31
|
+
|
32
|
+
belongs_to :site
|
33
|
+
belongs_to :feed
|
34
|
+
end
|
35
|
+
|
36
|
+
|
20
37
|
end # module Models
|
21
38
|
end # module Pluto
|
data/lib/pluto/schema.rb
CHANGED
@@ -4,6 +4,18 @@ module Pluto
|
|
4
4
|
class CreateDb < ActiveRecord::Migration
|
5
5
|
|
6
6
|
def up
|
7
|
+
create_table :sites do |t|
|
8
|
+
t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
|
9
|
+
t.string :key, :null => false # e.g. ruby, js, etc.
|
10
|
+
t.timestamps
|
11
|
+
end
|
12
|
+
|
13
|
+
create_table :subscriptions do |t| # has_many join table (sites/feeds)
|
14
|
+
t.references :site, :null => false
|
15
|
+
t.references :feed, :null => false
|
16
|
+
t.timestamps
|
17
|
+
end
|
18
|
+
|
7
19
|
create_table :feeds do |t|
|
8
20
|
t.string :title, :null => false
|
9
21
|
t.string :url, :null => false
|
data/lib/pluto/server.rb
CHANGED
@@ -31,19 +31,29 @@ class Server < Sinatra::Base
|
|
31
31
|
set :static, true # set up static file routing
|
32
32
|
|
33
33
|
|
34
|
-
set :site_config, {} # empty hash by default; use site_config
|
35
|
-
|
36
34
|
#######################
|
37
35
|
# Models
|
38
36
|
|
39
|
-
include Models # e.g. Feed, Item, etc.
|
40
|
-
|
37
|
+
include Models # e.g. Feed, Item, Site, etc.
|
38
|
+
|
39
|
+
#################
|
40
|
+
# Utilities
|
41
|
+
|
42
|
+
def site_config_hash
|
43
|
+
h = {}
|
44
|
+
site = Site.first # FIX: for now assume one planet per DB (fix later; allow planet key or similar)
|
45
|
+
if site.present?
|
46
|
+
h['title'] = site.title
|
47
|
+
else
|
48
|
+
h['title'] = 'Planet Untitled'
|
49
|
+
end
|
50
|
+
end
|
41
51
|
|
42
52
|
##############################################
|
43
53
|
# Controllers / Routing / Request Handlers
|
44
54
|
|
45
55
|
get '/' do
|
46
|
-
erb :index, locals: { site:
|
56
|
+
erb :index, locals: { site: site_config_hash }
|
47
57
|
end
|
48
58
|
|
49
59
|
# todo/fix: make a generic route for erb w /regex
|
@@ -0,0 +1,224 @@
|
|
1
|
+
module Pluto
|
2
|
+
|
3
|
+
class Updater
|
4
|
+
|
5
|
+
include LogUtils::Logging
|
6
|
+
|
7
|
+
include Models
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@worker = ::Fetcher::Worker.new
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :worker
|
14
|
+
|
15
|
+
def debug=(value)
|
16
|
+
@debug = value
|
17
|
+
### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def debug?
|
21
|
+
@debug || false
|
22
|
+
end
|
23
|
+
|
24
|
+
def fetch_feed( url )
|
25
|
+
xml = worker.read( url )
|
26
|
+
|
27
|
+
###
|
28
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
29
|
+
# will mostly be ASCII
|
30
|
+
# - try to change encoding to UTF-8 ourselves
|
31
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
32
|
+
|
33
|
+
#####
|
34
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
35
|
+
|
36
|
+
## NB:
|
37
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
38
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
39
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
40
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
41
|
+
xml
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def parse_feed( xml )
|
46
|
+
parser = RSS::Parser.new( xml )
|
47
|
+
parser.do_validate = false
|
48
|
+
parser.ignore_unknown_element = true
|
49
|
+
|
50
|
+
puts "Parsing feed..."
|
51
|
+
feed = parser.parse
|
52
|
+
|
53
|
+
puts " feed.class=#{feed.class.name}"
|
54
|
+
feed
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def update_subscriptions( config, opts={} )
|
59
|
+
|
60
|
+
## for now - use single site w/ key planet -- fix!! allow multiple sites (planets)
|
61
|
+
|
62
|
+
site_key = 'planet'
|
63
|
+
site_rec = Site.find_by_key( site_key )
|
64
|
+
if site_rec.nil?
|
65
|
+
site_rec = Site.new
|
66
|
+
site_rec.key = site_key
|
67
|
+
end
|
68
|
+
site_rec.title = config[ 'title' ]
|
69
|
+
site_rec.save!
|
70
|
+
|
71
|
+
|
72
|
+
config[ 'feeds' ].each do |feed_key|
|
73
|
+
|
74
|
+
feed_hash = config[ feed_key ]
|
75
|
+
feed_url = feed_hash[ 'feed_url' ]
|
76
|
+
|
77
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_url}<..."
|
78
|
+
|
79
|
+
feed_rec = Feed.find_by_key( feed_key )
|
80
|
+
if feed_rec.nil?
|
81
|
+
feed_rec = Feed.new
|
82
|
+
feed_rec.key = feed_key
|
83
|
+
end
|
84
|
+
feed_rec.feed_url = feed_url
|
85
|
+
feed_rec.url = feed_hash[ 'url' ]
|
86
|
+
feed_rec.title = feed_hash[ 'title' ] # todo: use title from feed?
|
87
|
+
feed_rec.save!
|
88
|
+
|
89
|
+
## todo:
|
90
|
+
# add subscription records (feed,site) - how?
|
91
|
+
end
|
92
|
+
|
93
|
+
end # method update_subscriptions
|
94
|
+
|
95
|
+
|
96
|
+
def update_feeds( opts={} )
|
97
|
+
|
98
|
+
logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
99
|
+
|
100
|
+
Feed.all.each do |feed_rec|
|
101
|
+
|
102
|
+
feed_key = feed_rec.key
|
103
|
+
feed_url = feed_rec.feed_url
|
104
|
+
|
105
|
+
feed_xml = fetch_feed( feed_url )
|
106
|
+
|
107
|
+
logger.debug "feed_xml:"
|
108
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
109
|
+
|
110
|
+
# if opts.verbose? # also write a copy to disk
|
111
|
+
if debug?
|
112
|
+
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
113
|
+
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
114
|
+
f.write( feed_xml )
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
puts "Before parsing feed >#{feed_key}<..."
|
119
|
+
|
120
|
+
feed = parse_feed( feed_xml )
|
121
|
+
|
122
|
+
if feed.class == RSS::Atom::Feed
|
123
|
+
puts "== #{feed.title.content} =="
|
124
|
+
else ## assume RSS::Rss::Feed
|
125
|
+
puts "== #{feed.channel.title} =="
|
126
|
+
end
|
127
|
+
|
128
|
+
feed.items.each do |item|
|
129
|
+
if feed.class == RSS::Atom::Feed
|
130
|
+
item_attribs = handle_feed_item_atom( item )
|
131
|
+
else ## assume RSS::Rss::Feed
|
132
|
+
item_attribs = handle_feed_item_rss( item )
|
133
|
+
end
|
134
|
+
|
135
|
+
# add feed_id fk_ref
|
136
|
+
item_attribs[ :feed_id ] = feed_rec.id
|
137
|
+
|
138
|
+
rec = Item.find_by_guid( item_attribs[ :guid ] )
|
139
|
+
if rec.nil?
|
140
|
+
rec = Item.new
|
141
|
+
puts "** NEW"
|
142
|
+
else
|
143
|
+
puts "UPDATE"
|
144
|
+
end
|
145
|
+
|
146
|
+
rec.update_attributes!( item_attribs )
|
147
|
+
end # each item
|
148
|
+
|
149
|
+
end # each feed
|
150
|
+
|
151
|
+
end # method run
|
152
|
+
|
153
|
+
|
154
|
+
def handle_feed_item_atom( item )
|
155
|
+
|
156
|
+
## todo: if content.content empty use summary for example
|
157
|
+
item_attribs = {
|
158
|
+
title: item.title.content,
|
159
|
+
url: item.link.href,
|
160
|
+
published_at: item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" ),
|
161
|
+
# content: item.content.content,
|
162
|
+
}
|
163
|
+
|
164
|
+
item_attribs[ :guid ] = item.id.content
|
165
|
+
|
166
|
+
if item.summary
|
167
|
+
item_attribs[ :content ] = item.summary.content
|
168
|
+
else
|
169
|
+
if item.content
|
170
|
+
text = item.content.content.dup
|
171
|
+
## strip all html tags
|
172
|
+
text = text.gsub( /<[^>]+>/, '' )
|
173
|
+
text = text[ 0..400 ] # get first 400 chars
|
174
|
+
## todo: check for length if > 400 add ... at the end???
|
175
|
+
item_attribs[ :content ] = text
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
puts "- #{item.title.content}"
|
180
|
+
puts " link >#{item.link.href}<"
|
181
|
+
puts " id (~guid) >#{item.id.content}<"
|
182
|
+
|
183
|
+
### todo: use/try published first? why? why not?
|
184
|
+
puts " updated (~pubDate) >#{item.updated.content}< >#{item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.updated.content.class.name}"
|
185
|
+
puts
|
186
|
+
|
187
|
+
# puts "*** dump item:"
|
188
|
+
# pp item
|
189
|
+
|
190
|
+
item_attribs
|
191
|
+
end
|
192
|
+
|
193
|
+
def handle_feed_item_rss( item )
|
194
|
+
|
195
|
+
item_attribs = {
|
196
|
+
title: item.title,
|
197
|
+
url: item.link,
|
198
|
+
published_at: item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" ),
|
199
|
+
# content: item.content_encoded,
|
200
|
+
}
|
201
|
+
|
202
|
+
# if item.content_encoded.nil?
|
203
|
+
# puts " using description for content"
|
204
|
+
item_attribs[ :content ] = item.description
|
205
|
+
# end
|
206
|
+
|
207
|
+
item_attribs[ :guid ] = item.guid.content
|
208
|
+
|
209
|
+
puts "- #{item.title}"
|
210
|
+
puts " link (#{item.link})"
|
211
|
+
puts " guid (#{item.guid.content})"
|
212
|
+
puts " pubDate >#{item.pubDate}< >#{item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.pubDate.class.name}"
|
213
|
+
puts
|
214
|
+
|
215
|
+
# puts "*** dump item:"
|
216
|
+
# pp item
|
217
|
+
|
218
|
+
item_attribs
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
end # class Fetcher
|
223
|
+
|
224
|
+
end # module Pluto
|
data/lib/pluto/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pluto
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-09-10 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: pakman
|
16
|
-
requirement: &
|
16
|
+
requirement: &83822120 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *83822120
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: fetcher
|
27
|
-
requirement: &
|
27
|
+
requirement: &83821440 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0.3'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *83821440
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: logutils
|
38
|
-
requirement: &
|
38
|
+
requirement: &83818890 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0.6'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *83818890
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: rdoc
|
49
|
-
requirement: &
|
49
|
+
requirement: &83844650 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '3.10'
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *83844650
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: hoe
|
60
|
-
requirement: &
|
60
|
+
requirement: &83842540 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: '3.3'
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *83842540
|
69
69
|
description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
|
70
70
|
Web Feeds)
|
71
71
|
email: webslideshow@googlegroups.com
|
@@ -84,6 +84,7 @@ files:
|
|
84
84
|
- lib/pluto.rb
|
85
85
|
- lib/pluto/cli/opts.rb
|
86
86
|
- lib/pluto/cli/runner.rb
|
87
|
+
- lib/pluto/connecter.rb
|
87
88
|
- lib/pluto/fetcher.rb
|
88
89
|
- lib/pluto/formatter.rb
|
89
90
|
- lib/pluto/models.rb
|
@@ -95,6 +96,7 @@ files:
|
|
95
96
|
- lib/pluto/server/views/debug.erb
|
96
97
|
- lib/pluto/server/views/index.erb
|
97
98
|
- lib/pluto/server/views/layout.erb
|
99
|
+
- lib/pluto/updater.rb
|
98
100
|
- lib/pluto/version.rb
|
99
101
|
- templates/blank.html.erb
|
100
102
|
- templates/blank.top.html.erb
|