pluto 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +2 -0
- data/lib/pluto.rb +24 -1
- data/lib/pluto/cli/opts.rb +1 -1
- data/lib/pluto/cli/runner.rb +4 -16
- data/lib/pluto/connecter.rb +70 -0
- data/lib/pluto/fetcher.rb +6 -163
- data/lib/pluto/models.rb +18 -1
- data/lib/pluto/schema.rb +12 -0
- data/lib/pluto/server.rb +15 -5
- data/lib/pluto/updater.rb +224 -0
- data/lib/pluto/version.rb +1 -1
- metadata +13 -11
data/Manifest.txt
CHANGED
|
@@ -6,6 +6,7 @@ bin/pluto
|
|
|
6
6
|
lib/pluto.rb
|
|
7
7
|
lib/pluto/cli/opts.rb
|
|
8
8
|
lib/pluto/cli/runner.rb
|
|
9
|
+
lib/pluto/connecter.rb
|
|
9
10
|
lib/pluto/fetcher.rb
|
|
10
11
|
lib/pluto/formatter.rb
|
|
11
12
|
lib/pluto/models.rb
|
|
@@ -17,6 +18,7 @@ lib/pluto/server/views/_version.erb
|
|
|
17
18
|
lib/pluto/server/views/debug.erb
|
|
18
19
|
lib/pluto/server/views/index.erb
|
|
19
20
|
lib/pluto/server/views/layout.erb
|
|
21
|
+
lib/pluto/updater.rb
|
|
20
22
|
lib/pluto/version.rb
|
|
21
23
|
templates/blank.html.erb
|
|
22
24
|
templates/blank.top.html.erb
|
data/lib/pluto.rb
CHANGED
|
@@ -6,11 +6,15 @@
|
|
|
6
6
|
# core and stlibs
|
|
7
7
|
|
|
8
8
|
require 'yaml'
|
|
9
|
+
require 'json'
|
|
10
|
+
require 'uri'
|
|
9
11
|
require 'pp'
|
|
10
12
|
require 'logger'
|
|
11
13
|
require 'optparse'
|
|
12
14
|
require 'fileutils'
|
|
13
15
|
|
|
16
|
+
|
|
17
|
+
|
|
14
18
|
require 'rss'
|
|
15
19
|
|
|
16
20
|
# rubygems
|
|
@@ -28,6 +32,8 @@ require 'pakman' # template pack manager
|
|
|
28
32
|
require 'pluto/version' # let version always get first
|
|
29
33
|
require 'pluto/schema'
|
|
30
34
|
require 'pluto/models'
|
|
35
|
+
require 'pluto/connecter'
|
|
36
|
+
require 'pluto/updater'
|
|
31
37
|
require 'pluto/fetcher'
|
|
32
38
|
require 'pluto/formatter'
|
|
33
39
|
|
|
@@ -44,6 +50,19 @@ module Pluto
|
|
|
44
50
|
"#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
|
|
45
51
|
end
|
|
46
52
|
|
|
53
|
+
def self.connect!( config=nil ) # convenience shortcut
|
|
54
|
+
Connecter.new.connect!( config )
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.update_subscriptions( config )
|
|
58
|
+
Updater.new.update_subscriptions( config )
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def self.update_feeds
|
|
62
|
+
Updater.new.update_feeds
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|
|
47
66
|
def self.main
|
|
48
67
|
Runner.new.run(ARGV)
|
|
49
68
|
end
|
|
@@ -51,4 +70,8 @@ module Pluto
|
|
|
51
70
|
end # module Pluto
|
|
52
71
|
|
|
53
72
|
|
|
54
|
-
|
|
73
|
+
if __FILE__ == $0
|
|
74
|
+
Pluto.main
|
|
75
|
+
else
|
|
76
|
+
puts Pluto.banner # say hello
|
|
77
|
+
end
|
data/lib/pluto/cli/opts.rb
CHANGED
data/lib/pluto/cli/runner.rb
CHANGED
|
@@ -5,7 +5,7 @@ class Runner
|
|
|
5
5
|
|
|
6
6
|
include LogUtils::Logging
|
|
7
7
|
|
|
8
|
-
include Models # e.g. Feed,Item,etc.
|
|
8
|
+
include Models # e.g. Feed,Item,Site,etc.
|
|
9
9
|
|
|
10
10
|
def initialize
|
|
11
11
|
@opts = Opts.new
|
|
@@ -37,6 +37,7 @@ class Runner
|
|
|
37
37
|
|
|
38
38
|
cmd.on( "--verbose", "Show debug trace" ) do
|
|
39
39
|
LogUtils::Logger.root.level = :debug
|
|
40
|
+
opts.verbose = true
|
|
40
41
|
end
|
|
41
42
|
|
|
42
43
|
## todo: add/allow -? too
|
|
@@ -68,10 +69,10 @@ EOS
|
|
|
68
69
|
|
|
69
70
|
db_config = {
|
|
70
71
|
adapter: 'sqlite3',
|
|
71
|
-
database: "#{opts.output_path}/#{name}.
|
|
72
|
+
database: "#{opts.output_path}/#{name}.db"
|
|
72
73
|
}
|
|
73
74
|
|
|
74
|
-
|
|
75
|
+
Connecter.new.connect!( db_config )
|
|
75
76
|
|
|
76
77
|
config_path = arg.dup # add .yml file extension if missing (for convenience)
|
|
77
78
|
config_path << '.yml' unless config_path.ends_with?( '.yml' )
|
|
@@ -91,18 +92,5 @@ EOS
|
|
|
91
92
|
end # method run
|
|
92
93
|
|
|
93
94
|
|
|
94
|
-
private
|
|
95
|
-
|
|
96
|
-
def setup_db( db_config )
|
|
97
|
-
puts 'db settings:'
|
|
98
|
-
pp db_config
|
|
99
|
-
|
|
100
|
-
ActiveRecord::Base.establish_connection( db_config )
|
|
101
|
-
|
|
102
|
-
unless Feed.table_exists?
|
|
103
|
-
CreateDb.new.up # run db migratation, that is, create db tables
|
|
104
|
-
end
|
|
105
|
-
end # method setup_db
|
|
106
|
-
|
|
107
95
|
end # class Runner
|
|
108
96
|
end # module Pakman
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
module Pluto
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# DB Connecter / Connection Manager
|
|
5
|
+
# lets you establish connection
|
|
6
|
+
|
|
7
|
+
class Connecter
|
|
8
|
+
|
|
9
|
+
include LogUtils::Logging
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
# do nothing for now
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def debug=(value)
|
|
17
|
+
@debug = value
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def debug?
|
|
21
|
+
@debug || false
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def connect!( config = nil )
|
|
26
|
+
|
|
27
|
+
if config.nil? # use DATABASE_URL
|
|
28
|
+
|
|
29
|
+
logger.debug "ENV['DATBASE_URL'] - >#{ENV['DATABASE_URL']}<"
|
|
30
|
+
|
|
31
|
+
db = URI.parse( ENV['DATABASE_URL'] || 'sqlite3:///pluto.db' )
|
|
32
|
+
|
|
33
|
+
if db.scheme == 'postgres'
|
|
34
|
+
config = {
|
|
35
|
+
adapter: 'postgresql',
|
|
36
|
+
host: db.host,
|
|
37
|
+
port: db.port,
|
|
38
|
+
username: db.user,
|
|
39
|
+
password: db.password,
|
|
40
|
+
database: db.path[1..-1],
|
|
41
|
+
encoding: 'utf8'
|
|
42
|
+
}
|
|
43
|
+
else
|
|
44
|
+
config = {
|
|
45
|
+
adapter: db.scheme, # sqlite3
|
|
46
|
+
database: db.path[1..-1] # pluto.db (NB: cut off leading /, thus 1..-1)
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
end # if config.nil?
|
|
50
|
+
|
|
51
|
+
puts 'db settings:'
|
|
52
|
+
pp config
|
|
53
|
+
|
|
54
|
+
# for debugging - disable for production use
|
|
55
|
+
if debug?
|
|
56
|
+
ActiveRecord::Base.logger = Logger.new( STDOUT )
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
ActiveRecord::Base.establish_connection( config )
|
|
60
|
+
|
|
61
|
+
# first time? - auto-run db migratation, that is, create db tables
|
|
62
|
+
unless Models::Feed.table_exists?
|
|
63
|
+
CreateDb.new.up
|
|
64
|
+
end
|
|
65
|
+
end # method connect!
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
end # class Connecter
|
|
69
|
+
|
|
70
|
+
end # module Pluto
|
data/lib/pluto/fetcher.rb
CHANGED
|
@@ -5,184 +5,27 @@ class Fetcher
|
|
|
5
5
|
|
|
6
6
|
include LogUtils::Logging
|
|
7
7
|
|
|
8
|
-
include Models
|
|
9
|
-
|
|
10
8
|
def initialize( opts, config )
|
|
11
9
|
@opts = opts
|
|
12
10
|
@config = config
|
|
13
|
-
@worker = ::Fetcher::Worker.new
|
|
14
11
|
end
|
|
15
12
|
|
|
16
|
-
attr_reader :opts, :config
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def fetch_feed( url )
|
|
20
|
-
xml = worker.read( url )
|
|
21
|
-
|
|
22
|
-
###
|
|
23
|
-
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
|
24
|
-
# will mostly be ASCII
|
|
25
|
-
# - try to change encoding to UTF-8 ourselves
|
|
26
|
-
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
|
27
|
-
|
|
28
|
-
#####
|
|
29
|
-
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
|
30
|
-
|
|
31
|
-
## NB:
|
|
32
|
-
# for now "hardcoded" to utf8 - what else can we do?
|
|
33
|
-
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
|
34
|
-
xml = xml.force_encoding( Encoding::UTF_8 )
|
|
35
|
-
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
|
36
|
-
xml
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def parse_feed( xml )
|
|
41
|
-
parser = RSS::Parser.new( xml )
|
|
42
|
-
parser.do_validate = false
|
|
43
|
-
parser.ignore_unknown_element = true
|
|
44
|
-
|
|
45
|
-
puts "Parsing feed..."
|
|
46
|
-
feed = parser.parse
|
|
47
|
-
|
|
48
|
-
puts " feed.class=#{feed.class.name}"
|
|
49
|
-
feed
|
|
50
|
-
end
|
|
13
|
+
attr_reader :opts, :config
|
|
51
14
|
|
|
52
15
|
|
|
53
16
|
def run
|
|
54
|
-
logger.debug "RSS::VERSION #{RSS::VERSION}"
|
|
55
|
-
|
|
56
|
-
config[ 'feeds' ].each do |feed_key|
|
|
57
|
-
|
|
58
|
-
feed_hash = config[ feed_key ]
|
|
59
|
-
feed_url = feed_hash[ 'feed_url' ]
|
|
60
|
-
|
|
61
|
-
puts "Fetching feed >#{feed_key}< using >#{feed_url}<..."
|
|
62
|
-
|
|
63
|
-
feed_rec = Feed.find_by_key( feed_key )
|
|
64
|
-
if feed_rec.nil?
|
|
65
|
-
feed_rec = Feed.new
|
|
66
|
-
feed_rec.key = feed_key
|
|
67
|
-
end
|
|
68
|
-
feed_rec.feed_url = feed_url
|
|
69
|
-
feed_rec.url = feed_hash[ 'url' ]
|
|
70
|
-
feed_rec.title = feed_hash[ 'title' ] # todo: use title from feed?
|
|
71
|
-
feed_rec.save!
|
|
72
|
-
|
|
73
|
-
feed_xml = fetch_feed( feed_url )
|
|
74
|
-
|
|
75
|
-
# if opts.verbose? # also write a copy to disk
|
|
76
|
-
# ## fix: use just File.write instead of fetching again
|
|
77
|
-
# worker.copy( feed_url, "./#{feed_key}.xml" )
|
|
78
|
-
# end
|
|
79
|
-
|
|
80
|
-
# xml = File.read( "./#{feed_key}.xml" )
|
|
81
|
-
|
|
82
|
-
puts "Before parsing feed >#{feed_key}<..."
|
|
83
17
|
|
|
84
|
-
|
|
18
|
+
updater = Updater.new
|
|
85
19
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
else ## assume RSS::Rss::Feed
|
|
89
|
-
puts "== #{feed.channel.title} =="
|
|
90
|
-
end
|
|
20
|
+
# pass along debug/verbose setting/switch
|
|
21
|
+
updater.debug = true if opts.verbose?
|
|
91
22
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
item_attribs = handle_feed_item_atom( item )
|
|
95
|
-
else ## assume RSS::Rss::Feed
|
|
96
|
-
item_attribs = handle_feed_item_rss( item )
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# add feed_id fk_ref
|
|
100
|
-
item_attribs[ :feed_id ] = feed_rec.id
|
|
101
|
-
|
|
102
|
-
rec = Item.find_by_guid( item_attribs[ :guid ] )
|
|
103
|
-
if rec.nil?
|
|
104
|
-
rec = Item.new
|
|
105
|
-
puts "** NEW"
|
|
106
|
-
else
|
|
107
|
-
puts "UPDATE"
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
rec.update_attributes!( item_attribs )
|
|
111
|
-
end # each item
|
|
112
|
-
|
|
113
|
-
end # each feed
|
|
23
|
+
updater.update_subscriptions( config )
|
|
24
|
+
updater.update_feeds
|
|
114
25
|
|
|
115
26
|
end # method run
|
|
116
27
|
|
|
117
28
|
|
|
118
|
-
def handle_feed_item_atom( item )
|
|
119
|
-
|
|
120
|
-
## todo: if content.content empty use summary for example
|
|
121
|
-
item_attribs = {
|
|
122
|
-
title: item.title.content,
|
|
123
|
-
url: item.link.href,
|
|
124
|
-
published_at: item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" ),
|
|
125
|
-
# content: item.content.content,
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
item_attribs[ :guid ] = item.id.content
|
|
129
|
-
|
|
130
|
-
if item.summary
|
|
131
|
-
item_attribs[ :content ] = item.summary.content
|
|
132
|
-
else
|
|
133
|
-
if item.content
|
|
134
|
-
text = item.content.content.dup
|
|
135
|
-
## strip all html tags
|
|
136
|
-
text = text.gsub( /<[^>]+>/, '' )
|
|
137
|
-
text = text[ 0..400 ] # get first 400 chars
|
|
138
|
-
## todo: check for length if > 400 add ... at the end???
|
|
139
|
-
item_attribs[ :content ] = text
|
|
140
|
-
end
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
puts "- #{item.title.content}"
|
|
144
|
-
puts " link >#{item.link.href}<"
|
|
145
|
-
puts " id (~guid) >#{item.id.content}<"
|
|
146
|
-
|
|
147
|
-
### todo: use/try published first? why? why not?
|
|
148
|
-
puts " updated (~pubDate) >#{item.updated.content}< >#{item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.updated.content.class.name}"
|
|
149
|
-
puts
|
|
150
|
-
|
|
151
|
-
# puts "*** dump item:"
|
|
152
|
-
# pp item
|
|
153
|
-
|
|
154
|
-
item_attribs
|
|
155
|
-
end
|
|
156
|
-
|
|
157
|
-
def handle_feed_item_rss( item )
|
|
158
|
-
|
|
159
|
-
item_attribs = {
|
|
160
|
-
title: item.title,
|
|
161
|
-
url: item.link,
|
|
162
|
-
published_at: item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" ),
|
|
163
|
-
# content: item.content_encoded,
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
# if item.content_encoded.nil?
|
|
167
|
-
# puts " using description for content"
|
|
168
|
-
item_attribs[ :content ] = item.description
|
|
169
|
-
# end
|
|
170
|
-
|
|
171
|
-
item_attribs[ :guid ] = item.guid.content
|
|
172
|
-
|
|
173
|
-
puts "- #{item.title}"
|
|
174
|
-
puts " link (#{item.link})"
|
|
175
|
-
puts " guid (#{item.guid.content})"
|
|
176
|
-
puts " pubDate >#{item.pubDate}< >#{item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.pubDate.class.name}"
|
|
177
|
-
puts
|
|
178
|
-
|
|
179
|
-
# puts "*** dump item:"
|
|
180
|
-
# pp item
|
|
181
|
-
|
|
182
|
-
item_attribs
|
|
183
|
-
end
|
|
184
|
-
|
|
185
|
-
|
|
186
29
|
end # class Fetcher
|
|
187
30
|
|
|
188
31
|
end # module Pluto
|
data/lib/pluto/models.rb
CHANGED
|
@@ -5,6 +5,8 @@ class Feed < ActiveRecord::Base
|
|
|
5
5
|
self.table_name = 'feeds'
|
|
6
6
|
|
|
7
7
|
has_many :items
|
|
8
|
+
has_many :subscriptions
|
|
9
|
+
has_many :sites, :through => :subscriptions
|
|
8
10
|
end
|
|
9
11
|
|
|
10
12
|
class Item < ActiveRecord::Base
|
|
@@ -13,9 +15,24 @@ class Item < ActiveRecord::Base
|
|
|
13
15
|
belongs_to :feed
|
|
14
16
|
|
|
15
17
|
def self.latest
|
|
16
|
-
|
|
18
|
+
order( 'published_at desc' )
|
|
17
19
|
end
|
|
18
20
|
end
|
|
19
21
|
|
|
22
|
+
class Site < ActiveRecord::Base
|
|
23
|
+
self.table_name = 'sites'
|
|
24
|
+
|
|
25
|
+
has_many :subscriptions
|
|
26
|
+
has_many :feeds, :through => :subscriptions
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
class Subscription < ActiveRecord::Base
|
|
30
|
+
self.table_name = 'subscriptions'
|
|
31
|
+
|
|
32
|
+
belongs_to :site
|
|
33
|
+
belongs_to :feed
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
|
|
20
37
|
end # module Models
|
|
21
38
|
end # module Pluto
|
data/lib/pluto/schema.rb
CHANGED
|
@@ -4,6 +4,18 @@ module Pluto
|
|
|
4
4
|
class CreateDb < ActiveRecord::Migration
|
|
5
5
|
|
|
6
6
|
def up
|
|
7
|
+
create_table :sites do |t|
|
|
8
|
+
t.string :title, :null => false # e.g Planet Ruby, Planet JavaScript, etc.
|
|
9
|
+
t.string :key, :null => false # e.g. ruby, js, etc.
|
|
10
|
+
t.timestamps
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
create_table :subscriptions do |t| # has_many join table (sites/feeds)
|
|
14
|
+
t.references :site, :null => false
|
|
15
|
+
t.references :feed, :null => false
|
|
16
|
+
t.timestamps
|
|
17
|
+
end
|
|
18
|
+
|
|
7
19
|
create_table :feeds do |t|
|
|
8
20
|
t.string :title, :null => false
|
|
9
21
|
t.string :url, :null => false
|
data/lib/pluto/server.rb
CHANGED
|
@@ -31,19 +31,29 @@ class Server < Sinatra::Base
|
|
|
31
31
|
set :static, true # set up static file routing
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
set :site_config, {} # empty hash by default; use site_config
|
|
35
|
-
|
|
36
34
|
#######################
|
|
37
35
|
# Models
|
|
38
36
|
|
|
39
|
-
include Models # e.g. Feed, Item, etc.
|
|
40
|
-
|
|
37
|
+
include Models # e.g. Feed, Item, Site, etc.
|
|
38
|
+
|
|
39
|
+
#################
|
|
40
|
+
# Utilities
|
|
41
|
+
|
|
42
|
+
def site_config_hash
|
|
43
|
+
h = {}
|
|
44
|
+
site = Site.first # FIX: for now assume one planet per DB (fix later; allow planet key or similar)
|
|
45
|
+
if site.present?
|
|
46
|
+
h['title'] = site.title
|
|
47
|
+
else
|
|
48
|
+
h['title'] = 'Planet Untitled'
|
|
49
|
+
end
|
|
50
|
+
end
|
|
41
51
|
|
|
42
52
|
##############################################
|
|
43
53
|
# Controllers / Routing / Request Handlers
|
|
44
54
|
|
|
45
55
|
get '/' do
|
|
46
|
-
erb :index, locals: { site:
|
|
56
|
+
erb :index, locals: { site: site_config_hash }
|
|
47
57
|
end
|
|
48
58
|
|
|
49
59
|
# todo/fix: make a generic route for erb w /regex
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
module Pluto
|
|
2
|
+
|
|
3
|
+
class Updater
|
|
4
|
+
|
|
5
|
+
include LogUtils::Logging
|
|
6
|
+
|
|
7
|
+
include Models
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@worker = ::Fetcher::Worker.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
attr_reader :worker
|
|
14
|
+
|
|
15
|
+
def debug=(value)
|
|
16
|
+
@debug = value
|
|
17
|
+
### logger.debug "[Updater] setting debug flag - debug? #{debug?}"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def debug?
|
|
21
|
+
@debug || false
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def fetch_feed( url )
|
|
25
|
+
xml = worker.read( url )
|
|
26
|
+
|
|
27
|
+
###
|
|
28
|
+
# NB: Net::HTTP will NOT set encoding UTF-8 etc.
|
|
29
|
+
# will mostly be ASCII
|
|
30
|
+
# - try to change encoding to UTF-8 ourselves
|
|
31
|
+
logger.debug "xml.encoding.name (before): #{xml.encoding.name}"
|
|
32
|
+
|
|
33
|
+
#####
|
|
34
|
+
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
|
|
35
|
+
|
|
36
|
+
## NB:
|
|
37
|
+
# for now "hardcoded" to utf8 - what else can we do?
|
|
38
|
+
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
|
|
39
|
+
xml = xml.force_encoding( Encoding::UTF_8 )
|
|
40
|
+
logger.debug "xml.encoding.name (after): #{xml.encoding.name}"
|
|
41
|
+
xml
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def parse_feed( xml )
|
|
46
|
+
parser = RSS::Parser.new( xml )
|
|
47
|
+
parser.do_validate = false
|
|
48
|
+
parser.ignore_unknown_element = true
|
|
49
|
+
|
|
50
|
+
puts "Parsing feed..."
|
|
51
|
+
feed = parser.parse
|
|
52
|
+
|
|
53
|
+
puts " feed.class=#{feed.class.name}"
|
|
54
|
+
feed
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def update_subscriptions( config, opts={} )
|
|
59
|
+
|
|
60
|
+
## for now - use single site w/ key planet -- fix!! allow multiple sites (planets)
|
|
61
|
+
|
|
62
|
+
site_key = 'planet'
|
|
63
|
+
site_rec = Site.find_by_key( site_key )
|
|
64
|
+
if site_rec.nil?
|
|
65
|
+
site_rec = Site.new
|
|
66
|
+
site_rec.key = site_key
|
|
67
|
+
end
|
|
68
|
+
site_rec.title = config[ 'title' ]
|
|
69
|
+
site_rec.save!
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
config[ 'feeds' ].each do |feed_key|
|
|
73
|
+
|
|
74
|
+
feed_hash = config[ feed_key ]
|
|
75
|
+
feed_url = feed_hash[ 'feed_url' ]
|
|
76
|
+
|
|
77
|
+
puts "Updating feed subscription >#{feed_key}< - >#{feed_url}<..."
|
|
78
|
+
|
|
79
|
+
feed_rec = Feed.find_by_key( feed_key )
|
|
80
|
+
if feed_rec.nil?
|
|
81
|
+
feed_rec = Feed.new
|
|
82
|
+
feed_rec.key = feed_key
|
|
83
|
+
end
|
|
84
|
+
feed_rec.feed_url = feed_url
|
|
85
|
+
feed_rec.url = feed_hash[ 'url' ]
|
|
86
|
+
feed_rec.title = feed_hash[ 'title' ] # todo: use title from feed?
|
|
87
|
+
feed_rec.save!
|
|
88
|
+
|
|
89
|
+
## todo:
|
|
90
|
+
# add subscription records (feed,site) - how?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
end # method update_subscriptions
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def update_feeds( opts={} )
|
|
97
|
+
|
|
98
|
+
logger.debug "using stdlib RSS::VERSION #{RSS::VERSION}"
|
|
99
|
+
|
|
100
|
+
Feed.all.each do |feed_rec|
|
|
101
|
+
|
|
102
|
+
feed_key = feed_rec.key
|
|
103
|
+
feed_url = feed_rec.feed_url
|
|
104
|
+
|
|
105
|
+
feed_xml = fetch_feed( feed_url )
|
|
106
|
+
|
|
107
|
+
logger.debug "feed_xml:"
|
|
108
|
+
logger.debug feed_xml[ 0..300 ] # get first 300 chars
|
|
109
|
+
|
|
110
|
+
# if opts.verbose? # also write a copy to disk
|
|
111
|
+
if debug?
|
|
112
|
+
logger.debug "saving feed to >./#{feed_key}.xml<..."
|
|
113
|
+
File.open( "./#{feed_key}.xml", 'w' ) do |f|
|
|
114
|
+
f.write( feed_xml )
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
puts "Before parsing feed >#{feed_key}<..."
|
|
119
|
+
|
|
120
|
+
feed = parse_feed( feed_xml )
|
|
121
|
+
|
|
122
|
+
if feed.class == RSS::Atom::Feed
|
|
123
|
+
puts "== #{feed.title.content} =="
|
|
124
|
+
else ## assume RSS::Rss::Feed
|
|
125
|
+
puts "== #{feed.channel.title} =="
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
feed.items.each do |item|
|
|
129
|
+
if feed.class == RSS::Atom::Feed
|
|
130
|
+
item_attribs = handle_feed_item_atom( item )
|
|
131
|
+
else ## assume RSS::Rss::Feed
|
|
132
|
+
item_attribs = handle_feed_item_rss( item )
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# add feed_id fk_ref
|
|
136
|
+
item_attribs[ :feed_id ] = feed_rec.id
|
|
137
|
+
|
|
138
|
+
rec = Item.find_by_guid( item_attribs[ :guid ] )
|
|
139
|
+
if rec.nil?
|
|
140
|
+
rec = Item.new
|
|
141
|
+
puts "** NEW"
|
|
142
|
+
else
|
|
143
|
+
puts "UPDATE"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
rec.update_attributes!( item_attribs )
|
|
147
|
+
end # each item
|
|
148
|
+
|
|
149
|
+
end # each feed
|
|
150
|
+
|
|
151
|
+
end # method run
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def handle_feed_item_atom( item )
|
|
155
|
+
|
|
156
|
+
## todo: if content.content empty use summary for example
|
|
157
|
+
item_attribs = {
|
|
158
|
+
title: item.title.content,
|
|
159
|
+
url: item.link.href,
|
|
160
|
+
published_at: item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" ),
|
|
161
|
+
# content: item.content.content,
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
item_attribs[ :guid ] = item.id.content
|
|
165
|
+
|
|
166
|
+
if item.summary
|
|
167
|
+
item_attribs[ :content ] = item.summary.content
|
|
168
|
+
else
|
|
169
|
+
if item.content
|
|
170
|
+
text = item.content.content.dup
|
|
171
|
+
## strip all html tags
|
|
172
|
+
text = text.gsub( /<[^>]+>/, '' )
|
|
173
|
+
text = text[ 0..400 ] # get first 400 chars
|
|
174
|
+
## todo: check for length if > 400 add ... at the end???
|
|
175
|
+
item_attribs[ :content ] = text
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
puts "- #{item.title.content}"
|
|
180
|
+
puts " link >#{item.link.href}<"
|
|
181
|
+
puts " id (~guid) >#{item.id.content}<"
|
|
182
|
+
|
|
183
|
+
### todo: use/try published first? why? why not?
|
|
184
|
+
puts " updated (~pubDate) >#{item.updated.content}< >#{item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.updated.content.class.name}"
|
|
185
|
+
puts
|
|
186
|
+
|
|
187
|
+
# puts "*** dump item:"
|
|
188
|
+
# pp item
|
|
189
|
+
|
|
190
|
+
item_attribs
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def handle_feed_item_rss( item )
|
|
194
|
+
|
|
195
|
+
item_attribs = {
|
|
196
|
+
title: item.title,
|
|
197
|
+
url: item.link,
|
|
198
|
+
published_at: item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" ),
|
|
199
|
+
# content: item.content_encoded,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
# if item.content_encoded.nil?
|
|
203
|
+
# puts " using description for content"
|
|
204
|
+
item_attribs[ :content ] = item.description
|
|
205
|
+
# end
|
|
206
|
+
|
|
207
|
+
item_attribs[ :guid ] = item.guid.content
|
|
208
|
+
|
|
209
|
+
puts "- #{item.title}"
|
|
210
|
+
puts " link (#{item.link})"
|
|
211
|
+
puts " guid (#{item.guid.content})"
|
|
212
|
+
puts " pubDate >#{item.pubDate}< >#{item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{item.pubDate.class.name}"
|
|
213
|
+
puts
|
|
214
|
+
|
|
215
|
+
# puts "*** dump item:"
|
|
216
|
+
# pp item
|
|
217
|
+
|
|
218
|
+
item_attribs
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
end # class Fetcher
|
|
223
|
+
|
|
224
|
+
end # module Pluto
|
data/lib/pluto/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pluto
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -13,7 +13,7 @@ date: 2013-09-10 00:00:00.000000000 Z
|
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: pakman
|
|
16
|
-
requirement: &
|
|
16
|
+
requirement: &83822120 !ruby/object:Gem::Requirement
|
|
17
17
|
none: false
|
|
18
18
|
requirements:
|
|
19
19
|
- - ! '>='
|
|
@@ -21,10 +21,10 @@ dependencies:
|
|
|
21
21
|
version: '0.5'
|
|
22
22
|
type: :runtime
|
|
23
23
|
prerelease: false
|
|
24
|
-
version_requirements: *
|
|
24
|
+
version_requirements: *83822120
|
|
25
25
|
- !ruby/object:Gem::Dependency
|
|
26
26
|
name: fetcher
|
|
27
|
-
requirement: &
|
|
27
|
+
requirement: &83821440 !ruby/object:Gem::Requirement
|
|
28
28
|
none: false
|
|
29
29
|
requirements:
|
|
30
30
|
- - ! '>='
|
|
@@ -32,10 +32,10 @@ dependencies:
|
|
|
32
32
|
version: '0.3'
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
|
-
version_requirements: *
|
|
35
|
+
version_requirements: *83821440
|
|
36
36
|
- !ruby/object:Gem::Dependency
|
|
37
37
|
name: logutils
|
|
38
|
-
requirement: &
|
|
38
|
+
requirement: &83818890 !ruby/object:Gem::Requirement
|
|
39
39
|
none: false
|
|
40
40
|
requirements:
|
|
41
41
|
- - ! '>='
|
|
@@ -43,10 +43,10 @@ dependencies:
|
|
|
43
43
|
version: '0.6'
|
|
44
44
|
type: :runtime
|
|
45
45
|
prerelease: false
|
|
46
|
-
version_requirements: *
|
|
46
|
+
version_requirements: *83818890
|
|
47
47
|
- !ruby/object:Gem::Dependency
|
|
48
48
|
name: rdoc
|
|
49
|
-
requirement: &
|
|
49
|
+
requirement: &83844650 !ruby/object:Gem::Requirement
|
|
50
50
|
none: false
|
|
51
51
|
requirements:
|
|
52
52
|
- - ~>
|
|
@@ -54,10 +54,10 @@ dependencies:
|
|
|
54
54
|
version: '3.10'
|
|
55
55
|
type: :development
|
|
56
56
|
prerelease: false
|
|
57
|
-
version_requirements: *
|
|
57
|
+
version_requirements: *83844650
|
|
58
58
|
- !ruby/object:Gem::Dependency
|
|
59
59
|
name: hoe
|
|
60
|
-
requirement: &
|
|
60
|
+
requirement: &83842540 !ruby/object:Gem::Requirement
|
|
61
61
|
none: false
|
|
62
62
|
requirements:
|
|
63
63
|
- - ~>
|
|
@@ -65,7 +65,7 @@ dependencies:
|
|
|
65
65
|
version: '3.3'
|
|
66
66
|
type: :development
|
|
67
67
|
prerelease: false
|
|
68
|
-
version_requirements: *
|
|
68
|
+
version_requirements: *83842540
|
|
69
69
|
description: pluto - Another Planet Generator (Lets You Build Web Pages from Published
|
|
70
70
|
Web Feeds)
|
|
71
71
|
email: webslideshow@googlegroups.com
|
|
@@ -84,6 +84,7 @@ files:
|
|
|
84
84
|
- lib/pluto.rb
|
|
85
85
|
- lib/pluto/cli/opts.rb
|
|
86
86
|
- lib/pluto/cli/runner.rb
|
|
87
|
+
- lib/pluto/connecter.rb
|
|
87
88
|
- lib/pluto/fetcher.rb
|
|
88
89
|
- lib/pluto/formatter.rb
|
|
89
90
|
- lib/pluto/models.rb
|
|
@@ -95,6 +96,7 @@ files:
|
|
|
95
96
|
- lib/pluto/server/views/debug.erb
|
|
96
97
|
- lib/pluto/server/views/index.erb
|
|
97
98
|
- lib/pluto/server/views/layout.erb
|
|
99
|
+
- lib/pluto/updater.rb
|
|
98
100
|
- lib/pluto/version.rb
|
|
99
101
|
- templates/blank.html.erb
|
|
100
102
|
- templates/blank.top.html.erb
|