bwkfanboy 0.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +8 -4
- data/Rakefile +9 -10
- data/bin/bwkfanboy +30 -8
- data/bin/bwkfanboy_fetch +6 -23
- data/bin/bwkfanboy_generate +7 -62
- data/bin/bwkfanboy_parse +3 -9
- data/bin/bwkfanboy_server +9 -36
- data/doc/NEWS.rdoc +16 -2
- data/doc/README.rdoc +8 -4
- data/doc/bwkfanboy_fetch.rdoc +4 -0
- data/doc/bwkfanboy_generate.rdoc +7 -0
- data/doc/bwkfanboy_parse.rdoc +7 -0
- data/doc/bwkfanboy_server.rdoc +35 -0
- data/doc/plugin.rdoc +32 -29
- data/lib/bwkfanboy/fetch.rb +36 -0
- data/lib/bwkfanboy/generate.rb +63 -0
- data/lib/bwkfanboy/parser.rb +4 -4
- data/lib/bwkfanboy/plugins/bwk.rb +3 -3
- data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +3 -3
- data/lib/bwkfanboy/plugins/quora.rb +3 -3
- data/lib/bwkfanboy/utils.rb +2 -2
- data/test/test_fetch.rb +1 -4
- data/test/test_generate.rb +0 -3
- data/test/test_parse.rb +4 -7
- data/test/test_server.rb +0 -3
- data/test/ts_utils.rb +8 -0
- metadata +30 -19
- data/test/plugins/bwk.rb +0 -29
data/README.rdoc
CHANGED
@@ -11,6 +11,10 @@ general assistance.
|
|
11
11
|
than the whole gem on rubygems.org, so grab the source before
|
12
12
|
struggling).
|
13
13
|
|
14
|
+
Plugins from version 1.1.4 are *incompatible* with the 0.1.x
|
15
|
+
series. Please reread in doc/plugin.rdoc the example of the skeleton
|
16
|
+
plugin.
|
17
|
+
|
14
18
|
= Architecture
|
15
19
|
|
16
20
|
== Plugins
|
@@ -18,10 +22,10 @@ struggling).
|
|
18
22
|
bwkfanboy comes with several plugins. One of them, for example, parses a
|
19
23
|
search page of dailyprincetonian.com looking for bwk's articles.
|
20
24
|
|
21
|
-
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
|
22
|
-
|
25
|
+
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse parent,
|
26
|
+
overriding 1 method.
|
23
27
|
|
24
|
-
|
28
|
+
Plugins can be in system
|
25
29
|
|
26
30
|
`gem env gemdir`/gems/bwkfanboy-x.y.z/lib/bwkfanboy/plugins
|
27
31
|
|
@@ -89,7 +93,7 @@ There are 2 method to get an Atom feed via HTTP:
|
|
89
93
|
|
90
94
|
2. Small *bwkfanboy_server* HTTP server. It can run from any user and
|
91
95
|
thus is able to inherit env variables for discovering your HOME
|
92
|
-
directory. Read
|
96
|
+
directory. Read doc/bwkfanboy_server.rdoc to know how to operate it.
|
93
97
|
|
94
98
|
= License
|
95
99
|
:include: doc/LICENSE
|
data/Rakefile
CHANGED
@@ -9,12 +9,12 @@ require 'rake/testtask'
|
|
9
9
|
spec = Gem::Specification.new() {|i|
|
10
10
|
i.name = "bwkfanboy"
|
11
11
|
i.summary = 'A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.'
|
12
|
-
i.version =
|
12
|
+
i.version = `bin/#{i.name} -V`
|
13
13
|
i.author = 'Alexander Gromnitsky'
|
14
14
|
i.email = 'alexander.gromnitsky@gmail.com'
|
15
|
-
i.homepage =
|
15
|
+
i.homepage = "http://github.com/gromnitsky/#{i.name}"
|
16
16
|
i.platform = Gem::Platform::RUBY
|
17
|
-
i.required_ruby_version = '>= 1.9'
|
17
|
+
i.required_ruby_version = '>= 1.9.2'
|
18
18
|
i.files = FileList['lib/**/*', 'bin/*', 'doc/*', '[A-Z]*', 'test/**/*']
|
19
19
|
|
20
20
|
i.executables = FileList['bin/*'].gsub(/^bin\//, '')
|
@@ -22,10 +22,10 @@ spec = Gem::Specification.new() {|i|
|
|
22
22
|
|
23
23
|
i.test_files = FileList['test/test_*.rb']
|
24
24
|
|
25
|
-
i.rdoc_options << '-m' << '
|
26
|
-
i.extra_rdoc_files = FileList['
|
25
|
+
i.rdoc_options << '-m' << 'doc/README.rdoc' << '-x' << 'plugins'
|
26
|
+
i.extra_rdoc_files = FileList['doc/*']
|
27
27
|
|
28
|
-
i.add_dependency('activesupport', '>= 3.0.
|
28
|
+
i.add_dependency('activesupport', '>= 3.0.1')
|
29
29
|
i.add_dependency('nokogiri', '>= 1.4.3')
|
30
30
|
i.add_dependency('open4', '>= 1.0.1')
|
31
31
|
i.add_dependency('jsonschema', '>= 2.0.0')
|
@@ -36,12 +36,11 @@ Rake::GemPackageTask.new(spec).define()
|
|
36
36
|
task(default: %(repackage))
|
37
37
|
|
38
38
|
Rake::RDocTask.new('doc') {|i|
|
39
|
-
i.main =
|
40
|
-
i.rdoc_files = FileList['doc/*', 'lib/**/*.rb'
|
41
|
-
i.rdoc_files.exclude("lib/**/plugins"
|
39
|
+
i.main = 'doc/README.rdoc'
|
40
|
+
i.rdoc_files = FileList['doc/*', 'lib/**/*.rb']
|
41
|
+
i.rdoc_files.exclude("lib/**/plugins")
|
42
42
|
}
|
43
43
|
|
44
44
|
Rake::TestTask.new() {|i|
|
45
45
|
i.test_files = FileList['test/test_*.rb']
|
46
|
-
i.verbose = true
|
47
46
|
}
|
data/bin/bwkfanboy
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
4
|
# This program is executed by bin/bwkfanboy_server to do all dirty work:
|
@@ -11,13 +11,14 @@
|
|
11
11
|
#
|
12
12
|
# % bwkfanboy -h
|
13
13
|
#
|
14
|
-
# to get some basic help
|
14
|
+
# to get some basic help--read about Bwkfanboy module.
|
15
15
|
|
16
16
|
require 'shellwords'
|
17
17
|
require_relative '../lib/bwkfanboy/parser'
|
18
18
|
|
19
19
|
$conf = {
|
20
|
-
mode: '
|
20
|
+
mode: 'fast',
|
21
|
+
debug: false,
|
21
22
|
banner: "Usage: #{File.basename($0)} [options] plugin-name"
|
22
23
|
}
|
23
24
|
|
@@ -87,7 +88,8 @@ o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner]) # create OptionParser object
|
|
87
88
|
o.on('-i', 'Show some info about the plugin') { |i| $conf[:mode] = 'info' }
|
88
89
|
o.on('-l', 'List all plugins') { |i| $conf[:mode] = 'list' }
|
89
90
|
o.on('-p', 'List all plugins paths') { |i| $conf[:mode] = 'path' }
|
90
|
-
o.on('-
|
91
|
+
o.on('-O', '(ignore this) Execute all bwkfanboy_* utils in a pipe') { |i| $conf[:mode] = 'pipe' }
|
92
|
+
o.on('-D', '(ignore this) Use URI_DEBUG const instead URI in plugins') { |i| $conf[:debug] = true }
|
91
93
|
Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
|
92
94
|
|
93
95
|
plugin = Plugin.new(ARGV[0])
|
@@ -107,12 +109,11 @@ when 'info'
|
|
107
109
|
plugin.load(opt).dump_info
|
108
110
|
when 'version'
|
109
111
|
puts Bwkfanboy::Meta::VERSION
|
110
|
-
|
111
|
-
# A pipe mode
|
112
|
+
when 'pipe'
|
112
113
|
pn = plugin.load(opt)
|
113
114
|
cmd = "./bwkfanboy_fetch | ./bwkfanboy_parse '#{plugin.path}' #{opt.size != 0 ? Shellwords.join(opt) : ''} | ./bwkfanboy_generate"
|
114
115
|
if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
|
115
|
-
puts (
|
116
|
+
puts (!$conf[:debug] ? pn.uri() : pn.class::Meta::URI_DEBUG)
|
116
117
|
puts cmd
|
117
118
|
exit 0
|
118
119
|
end
|
@@ -121,10 +122,31 @@ else
|
|
121
122
|
Dir.chdir(File.dirname(File.expand_path($0)))
|
122
123
|
|
123
124
|
pipe = IO.popen(cmd, 'w+')
|
124
|
-
pipe.puts (
|
125
|
+
pipe.puts (!$conf[:debug] ? pn.uri : pn.class::Meta::URI_DEBUG)
|
125
126
|
pipe.close_write
|
126
127
|
while line = pipe.gets
|
127
128
|
puts line
|
128
129
|
end
|
129
130
|
pipe.close
|
131
|
+
|
132
|
+
else
|
133
|
+
# a 'new', faster way
|
134
|
+
pn = plugin.load(opt)
|
135
|
+
|
136
|
+
require_relative '../lib/bwkfanboy/fetch'
|
137
|
+
require_relative '../lib/bwkfanboy/generate'
|
138
|
+
|
139
|
+
# 1. fetch & parse
|
140
|
+
pn = Page.new(opt)
|
141
|
+
Bwkfanboy::Fetch.cat(!$conf[:debug] ? pn.uri() : pn.class::Meta::URI_DEBUG) {|stream|
|
142
|
+
pn.parse(stream)
|
143
|
+
}
|
144
|
+
|
145
|
+
if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
|
146
|
+
pn.dump()
|
147
|
+
exit 0
|
148
|
+
end
|
149
|
+
|
150
|
+
# 2.generate
|
151
|
+
puts Bwkfanboy::Generate.atom(JSON.parse(pn.to_json))
|
130
152
|
end
|
data/bin/bwkfanboy_fetch
CHANGED
@@ -1,30 +1,13 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
|
5
|
-
# read for the local file) and print to stdout.
|
6
|
-
|
7
|
-
require 'open-uri'
|
8
|
-
|
9
|
-
require_relative '../lib/bwkfanboy/utils'
|
4
|
+
require_relative '../lib/bwkfanboy/fetch'
|
10
5
|
|
11
6
|
$conf = { banner: "Usage: #{File.basename($0)} [options] < uri" }
|
12
7
|
|
13
8
|
Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], nil, true)
|
14
9
|
|
15
|
-
uri = gets
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
begin
|
20
|
-
open(uri, "User-Agent" => Bwkfanboy::Meta::USER_AGENT) {|f|
|
21
|
-
if defined?(f.meta) && f.status[0] != '200' then
|
22
|
-
Bwkfanboy::Utils.errx(1, "cannot fetch #{uri} : HTTP responce: #{f.status[0]}")
|
23
|
-
end
|
24
|
-
Bwkfanboy::Utils.veputs(1, "charset = #{f.content_type_parse[1][1]}\n") if defined?(f.meta)
|
25
|
-
f.each_line {|i| puts i}
|
26
|
-
}
|
27
|
-
rescue
|
28
|
-
# typically Errno::ENOENT
|
29
|
-
Bwkfanboy::Utils.errx(1, "cannot fetch: #{$!}");
|
30
|
-
end
|
10
|
+
uri = gets
|
11
|
+
Bwkfanboy::Fetch.cat(uri) {|f|
|
12
|
+
f.each_line {|line| puts line }
|
13
|
+
}
|
data/bin/bwkfanboy_generate
CHANGED
@@ -1,24 +1,16 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
|
5
|
-
# result to stdout in UTF-8.
|
6
|
-
#
|
7
|
-
# One can validate the JSON by providing '--check' command line option
|
8
|
-
# (by default the validating is off).
|
9
|
-
|
10
|
-
require 'rss/maker'
|
11
|
-
require 'date'
|
12
|
-
require 'json'
|
13
|
-
require 'jsonschema'
|
14
|
-
|
15
|
-
require_relative '../lib/bwkfanboy/utils'
|
4
|
+
require_relative '../lib/bwkfanboy/generate'
|
16
5
|
|
17
6
|
$conf = {
|
18
7
|
banner: "Usage: #{File.basename($0)} [options] < json",
|
19
8
|
check: false
|
20
9
|
}
|
21
10
|
|
11
|
+
# we are expection the input ONLY in UTF-8
|
12
|
+
Encoding.default_external = 'UTF-8'
|
13
|
+
|
22
14
|
o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner])
|
23
15
|
o.on('--check', 'Validate the input (slow!)') { |i| $conf[:check] = true }
|
24
16
|
Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
|
@@ -29,52 +21,5 @@ rescue
|
|
29
21
|
Bwkfanboy::Utils.errx(1, "stdin had invalid JSON");
|
30
22
|
end
|
31
23
|
|
32
|
-
|
33
|
-
|
34
|
-
if $conf[:check] then
|
35
|
-
begin
|
36
|
-
JSON::Schema.validate(j, JSON.parse(File.read(schema)))
|
37
|
-
rescue
|
38
|
-
Bwkfanboy::Utils.errx(1, "JSON validation with schema (#{schema}) failed");
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
feed = RSS::Maker.make("atom") { |maker|
|
43
|
-
maker.channel.id = j['channel']['id']
|
44
|
-
maker.channel.updated = j['channel']['updated']
|
45
|
-
maker.channel.author = j['channel']['author']
|
46
|
-
maker.channel.title = j['channel']['title']
|
47
|
-
|
48
|
-
maker.channel.links.new_link {|i|
|
49
|
-
i.href = j['channel']['link']
|
50
|
-
i.rel = 'alternate'
|
51
|
-
i.type = 'text/html' # eh
|
52
|
-
}
|
53
|
-
|
54
|
-
maker.items.do_sort = true
|
55
|
-
|
56
|
-
j['x_entries'].each { |i|
|
57
|
-
maker.items.new_item do |item|
|
58
|
-
item.links.new_link {|k|
|
59
|
-
k.href = i['link']
|
60
|
-
k.rel = 'alternate'
|
61
|
-
k.type = 'text/html' # only to make happy crappy pr2nntp gateway
|
62
|
-
}
|
63
|
-
item.title = i['title']
|
64
|
-
item.author = i['author']
|
65
|
-
item.updated = i['updated']
|
66
|
-
item.content.type = j['channel']['x_entries_content_type']
|
67
|
-
|
68
|
-
case item.content.type
|
69
|
-
when 'text'
|
70
|
-
item.content.content = i['content']
|
71
|
-
when 'html'
|
72
|
-
item.content.content = i['content']
|
73
|
-
else
|
74
|
-
item.content.xhtml = i['content']
|
75
|
-
end
|
76
|
-
end
|
77
|
-
}
|
78
|
-
}
|
79
|
-
|
80
|
-
puts feed
|
24
|
+
if $conf[:check] then Bwkfanboy::Generate.validate(j) end
|
25
|
+
puts Bwkfanboy::Generate.atom(j)
|
data/bin/bwkfanboy_parse
CHANGED
@@ -1,12 +1,6 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
# Take 1 command line parameter: a full path to a plugin.
|
5
|
-
#
|
6
|
-
# Read stdin for a HTML, parse it and print the result to stdout in JSON
|
7
|
-
# format. If '-vv' command line parameters were given, output will be in
|
8
|
-
# 'key: value' pairs and <em>not</em> in JSON.
|
9
|
-
|
10
4
|
require_relative '../lib/bwkfanboy/parser'
|
11
5
|
|
12
6
|
$conf = {
|
@@ -19,12 +13,12 @@ if ARGV.size == 0 then
|
|
19
13
|
abort($conf[:banner])
|
20
14
|
else
|
21
15
|
Bwkfanboy::Utils.plugin_load(ARGV[0], Bwkfanboy::Meta::PLUGIN_CLASS)
|
22
|
-
end
|
16
|
+
end
|
23
17
|
|
24
18
|
opt = Bwkfanboy::Utils.plugin_opts(ARGV)
|
25
19
|
pn = Page.new(opt)
|
26
20
|
pn.check()
|
27
|
-
pn.parse()
|
21
|
+
pn.parse(STDIN)
|
28
22
|
|
29
23
|
if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
|
30
24
|
pn.dump()
|
data/bin/bwkfanboy_server
CHANGED
@@ -1,42 +1,9 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
# Start a HTTP server (by default on 127.0.0.1:9042). To get Atom feeds
|
5
|
-
# from it, initiate GET request with URI
|
6
|
-
#
|
7
|
-
# http://localhost:9042/?p=PLUGIN
|
8
|
-
#
|
9
|
-
# where +PLUGIN+ is a name of a bwkfanboy's plugin (without '.re' suffix).
|
10
|
-
# If the plugin requires additional options you can specify them like:
|
11
|
-
#
|
12
|
-
# http://localhost:9042/?p=PLUGIN&o=opt1%20%22opt2%20has%20spaces%22
|
13
|
-
#
|
14
|
-
# where <tt>opt1%20%22opt2%20has%20spaces%22</tt> is a encoded string
|
15
|
-
# <tt>opt1 "opt2 has spaces"</tt>.
|
16
|
-
#
|
17
|
-
# To list all available plugins, point you browser to
|
18
|
-
#
|
19
|
-
# http://localhost:9042/list
|
20
|
-
#
|
21
|
-
# The server is intended to run from a non-root user from
|
22
|
-
# <tt>~/.login</tt> file. It can detach from a terminal if you give it
|
23
|
-
# '-d' command line option.
|
24
|
-
#
|
25
|
-
# For other help, type:
|
26
|
-
#
|
27
|
-
# bwkfanboy_server -h
|
28
|
-
#
|
29
|
-
# The server maintains 2 logs:
|
30
|
-
#
|
31
|
-
# /tmp/bwkfanboy/USER/log/bwkfanboy_server.log
|
32
|
-
# /tmp/bwkfanboy/USER/log/bwkfanboy_server-access.log
|
33
|
-
#
|
34
|
-
# The file with a pid:
|
35
|
-
#
|
36
|
-
# /tmp/bwkfanboy/USER/bwkfanboy_server.pid
|
37
|
-
|
38
4
|
require 'shellwords'
|
39
5
|
require 'webrick'
|
6
|
+
require 'date'
|
40
7
|
require_relative '../lib/bwkfanboy/utils'
|
41
8
|
|
42
9
|
$conf = {
|
@@ -78,8 +45,14 @@ class FeedServlet < WEBrick::HTTPServlet::AbstractServlet # :nodoc: all
|
|
78
45
|
if r[0] != 0 then
|
79
46
|
raise WEBrick::HTTPStatus::InternalServerError.new("Errors in the pipeline:\n\n #{r[1]}")
|
80
47
|
end
|
81
|
-
|
48
|
+
|
82
49
|
res.body = r[2]
|
50
|
+
|
51
|
+
# search for <updated> tag and set Last-Modified header
|
52
|
+
if (m = r[2].match('<updated>(.+?)</updated>'))
|
53
|
+
res['Last-Modified'] = DateTime.parse(m.to_s).httpdate
|
54
|
+
end
|
55
|
+
|
83
56
|
else
|
84
57
|
raise WEBrick::HTTPStatus::InternalServerError.new("Parameter 'p' required")
|
85
58
|
end
|
data/doc/NEWS.rdoc
CHANGED
@@ -1,6 +1,20 @@
|
|
1
|
-
===
|
1
|
+
=== 1.1.4
|
2
2
|
|
3
|
-
-
|
3
|
+
- INCOMPATIBILITY: from now on, all plugins must do NOT read the stdin
|
4
|
+
but bwkfanboy's provided stream. See doc/plugins.rdoc.
|
5
|
+
|
6
|
+
- Moved the code from bwkfanboy_* to libraries.
|
7
|
+
|
8
|
+
- From now on, bwkfanboy util by default do not run utils in a pipe but
|
9
|
+
uses libraries directly. One can restore the old functionality with
|
10
|
+
'-O' CL swith.
|
11
|
+
|
12
|
+
=== 0.1.4
|
13
|
+
|
14
|
+
- The minimum required Ruby version is 1.9.2.
|
15
|
+
- bwkfanboy_server now inserts a Last-Modified header.
|
16
|
+
- Fixed (?) a bug in bwkfanboy_generate with external encoding.
|
17
|
+
- Updated tests for Ruby 1.9.2.
|
4
18
|
|
5
19
|
=== 0.1.3
|
6
20
|
|
data/doc/README.rdoc
CHANGED
@@ -11,6 +11,10 @@ general assistance.
|
|
11
11
|
than the whole gem on rubygems.org, so grab the source before
|
12
12
|
struggling).
|
13
13
|
|
14
|
+
Plugins from version 1.1.4 are *incompatible* with the 0.1.x
|
15
|
+
series. Please reread in doc/plugin.rdoc the example of the skeleton
|
16
|
+
plugin.
|
17
|
+
|
14
18
|
= Architecture
|
15
19
|
|
16
20
|
== Plugins
|
@@ -18,10 +22,10 @@ struggling).
|
|
18
22
|
bwkfanboy comes with several plugins. One of them, for example, parses a
|
19
23
|
search page of dailyprincetonian.com looking for bwk's articles.
|
20
24
|
|
21
|
-
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
|
22
|
-
|
25
|
+
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse parent,
|
26
|
+
overriding 1 method.
|
23
27
|
|
24
|
-
|
28
|
+
Plugins can be in system
|
25
29
|
|
26
30
|
`gem env gemdir`/gems/bwkfanboy-x.y.z/lib/bwkfanboy/plugins
|
27
31
|
|
@@ -89,7 +93,7 @@ There are 2 method to get an Atom feed via HTTP:
|
|
89
93
|
|
90
94
|
2. Small *bwkfanboy_server* HTTP server. It can run from any user and
|
91
95
|
thus is able to inherit env variables for discovering your HOME
|
92
|
-
directory. Read
|
96
|
+
directory. Read doc/bwkfanboy_server.rdoc to know how to operate it.
|
93
97
|
|
94
98
|
= License
|
95
99
|
:include: doc/LICENSE
|
@@ -0,0 +1,7 @@
|
|
1
|
+
= bwkfanboy_parse
|
2
|
+
|
3
|
+
Takes 1 command line parameter: a full path to a plugin.
|
4
|
+
|
5
|
+
Reads stdin for a HTML, parses it and prints the result to stdout in
|
6
|
+
JSON format. If '-vv' command line parameters were given, output will
|
7
|
+
be in 'key: value' pairs and <em>not</em> in JSON.
|
@@ -0,0 +1,35 @@
|
|
1
|
+
= bwkfanboy_server
|
2
|
+
|
3
|
+
Start a HTTP server (by default on 127.0.0.1:9042). To get Atom feeds
|
4
|
+
from it, initiate GET request with URI
|
5
|
+
|
6
|
+
http://localhost:9042/?p=PLUGIN
|
7
|
+
|
8
|
+
where +PLUGIN+ is a name of a bwkfanboy's plugin (without '.re' suffix).
|
9
|
+
If the plugin requires additional options you can specify them like:
|
10
|
+
|
11
|
+
http://localhost:9042/?p=PLUGIN&o=opt1%20%22opt2%20has%20spaces%22
|
12
|
+
|
13
|
+
where <tt>opt1%20%22opt2%20has%20spaces%22</tt> is a encoded string
|
14
|
+
<tt>opt1 "opt2 has spaces"</tt>.
|
15
|
+
|
16
|
+
To list all available plugins, point you browser to
|
17
|
+
|
18
|
+
http://localhost:9042/list
|
19
|
+
|
20
|
+
The server is intended to run from a non-root user from
|
21
|
+
<tt>~/.login</tt> file. It can detach from a terminal if you give it
|
22
|
+
'-d' command line option.
|
23
|
+
|
24
|
+
For other help, type:
|
25
|
+
|
26
|
+
bwkfanboy_server -h
|
27
|
+
|
28
|
+
The server maintains 2 logs:
|
29
|
+
|
30
|
+
/tmp/bwkfanboy/USER/log/bwkfanboy_server.log
|
31
|
+
/tmp/bwkfanboy/USER/log/bwkfanboy_server-access.log
|
32
|
+
|
33
|
+
The file with a pid:
|
34
|
+
|
35
|
+
/tmp/bwkfanboy/USER/bwkfanboy_server.pid
|
data/doc/plugin.rdoc
CHANGED
@@ -1,12 +1,11 @@
|
|
1
|
-
=
|
1
|
+
= How to Write a \Plugin
|
2
2
|
|
3
3
|
First of all, look at examples provided with bwkfanboy. They were
|
4
4
|
intended to be 100% working because I was writing them for myself.
|
5
5
|
|
6
|
-
Basically, all you need is to write a class named _Page_ that
|
7
|
-
|
8
|
-
|
9
|
-
class.
|
6
|
+
Basically, all you need is to write a class named _Page_ that inherits
|
7
|
+
class Bwkfanboy::Parse, override in the child \#myparse method and write
|
8
|
+
a simple module named _Meta_ inside your _Page_ class.
|
10
9
|
|
11
10
|
== Skeleton
|
12
11
|
|
@@ -24,9 +23,9 @@ Here is a skeleton of a plugin:
|
|
24
23
|
CONTENT_TYPE = 'html'
|
25
24
|
end
|
26
25
|
|
27
|
-
def myparse()
|
28
|
-
# read
|
29
|
-
doc = Nokogiri::HTML(
|
26
|
+
def myparse(stream)
|
27
|
+
# read 'stream' IO object and parse it
|
28
|
+
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
30
29
|
doc.xpath("XPATH QUERY").each {|i|
|
31
30
|
t = clean(i.xpath("XPATH QUERY").text())
|
32
31
|
l = clean(i.xpath("XPATH QUERY").text())
|
@@ -52,8 +51,8 @@ it should.
|
|
52
51
|
|
53
52
|
=== \Meta
|
54
53
|
|
55
|
-
Module _Meta_ can have
|
56
|
-
|
54
|
+
Module _Meta_ can only have constants--and *all* constants listed in the
|
55
|
+
skeleton are mandatory.
|
57
56
|
|
58
57
|
* <tt>URI</tt>--can be a <tt>http(s)://</tt> or <tt>ftp://</tt> URL
|
59
58
|
or just a path to a file on your local machine, as
|
@@ -75,13 +74,17 @@ the skeleton are required.
|
|
75
74
|
|
76
75
|
=== myparse
|
77
76
|
|
78
|
-
In
|
79
|
-
HTML you want to parse. The general idea:
|
77
|
+
In \#myparse method read 'stream' IO object. The contents of it is the
|
78
|
+
raw HTML you want to parse. The general idea:
|
80
79
|
|
81
|
-
* Atom feed must contain at least 1 entry, so look in HTML for some
|
82
|
-
crap which you break into 5 peaces:
|
83
|
-
|
84
|
-
|
80
|
+
* Atom feed must contain at least 1 entry, so look in the HTML for some
|
81
|
+
crap which you must break into 5 peaces:
|
82
|
+
|
83
|
+
- a title of the entry
|
84
|
+
- a link for it
|
85
|
+
- a date for the entry
|
86
|
+
- who is the author of the entry and
|
87
|
+
- its contents.
|
85
88
|
|
86
89
|
* After you scan and grab 1 entry, create a hash and add it to
|
87
90
|
_self_ as it was in the skeleton:
|
@@ -89,20 +92,20 @@ HTML you want to parse. The general idea:
|
|
89
92
|
self << { title: t, link: l, updated: u, author: a, content: c }
|
90
93
|
|
91
94
|
Here variables _t_, _l_, _u_, _a_ and _c_ contains the actual
|
92
|
-
|
95
|
+
value of 5 peaces for the entry. Names of the keys in the hash are
|
93
96
|
important of course--don't invent your own.
|
94
97
|
|
95
|
-
*
|
98
|
+
* Probably there would be more crap in the HTML that you can use to
|
96
99
|
construct another entry. Keep parsing and adding entries.
|
97
100
|
|
98
|
-
* While you scanning, use the 2 helper methods for cleaning each
|
99
|
-
peace: \#clean, which removed duplicate spaces and
|
100
|
-
|
101
|
-
|
101
|
+
* While you're scanning, use the 2 helper methods for cleaning each
|
102
|
+
peace: \#clean, which removed duplicate spaces and \#date, which parses
|
103
|
+
a string and return a date in ISO8601 format. You may override \#date
|
104
|
+
method if you like.
|
102
105
|
|
103
106
|
=== Options
|
104
107
|
|
105
|
-
Plugins can have _options_ and a user should provide
|
108
|
+
Plugins can have _options_ and a user should provide them to the plugin
|
106
109
|
in the real-time. For example, say you're scraping a site where many
|
107
110
|
users are wasting their time. If you want to watch for several of them
|
108
111
|
it is silly to write a new plugin every time for a new
|
@@ -130,20 +133,20 @@ _option_ becomes mandatory for the end-user.
|
|
130
133
|
To test how nice your plugin works, save the html page to the file
|
131
134
|
and type:
|
132
135
|
|
133
|
-
% bwkfanboy_parse -vv path/to/
|
136
|
+
% bwkfanboy_parse -vv /path/to/the/plugin.rb < saved_page.html
|
134
137
|
|
135
138
|
to see the result as in plain text, or
|
136
139
|
|
137
|
-
% bwkfanboy_parse -v path/to/
|
140
|
+
% bwkfanboy_parse -v /path/to/the/plugin.rb < saved_page.html
|
138
141
|
|
139
|
-
as pretty JSON.
|
142
|
+
as a pretty JSON.
|
140
143
|
|
141
144
|
For option-enabled plugins, supply additional parameters for them after
|
142
145
|
the plugin path:
|
143
146
|
|
144
|
-
% bwkfanboy_parse -vv path/to/
|
145
|
-
option_1 "
|
147
|
+
% bwkfanboy_parse -vv /path/to/the/plugin.rb \
|
148
|
+
option_1 "option 2" < saved_page.html
|
146
149
|
|
147
150
|
<tt>bwkfanboy_parse</tt> return 0 if no errors occurred or >= 1 if you
|
148
151
|
have errors in your plugin code. N.B.: the output from
|
149
|
-
<tt>bwkparser_parse</tt>
|
152
|
+
<tt>bwkparser_parse</tt> *must* always be in UTF-8.
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
require_relative 'utils'
|
4
|
+
|
5
|
+
module Bwkfanboy
|
6
|
+
class Fetch
|
7
|
+
|
8
|
+
# If no block given, return contents of fetch'ed URI. Otherwise,
|
9
|
+
# execute the block with 1 parameter--stream.
|
10
|
+
def self.cat(uri)
|
11
|
+
uri.chomp!
|
12
|
+
|
13
|
+
Bwkfanboy::Utils.veputs(1, "fetching #{uri}\n")
|
14
|
+
|
15
|
+
begin
|
16
|
+
open(uri, "User-Agent" => Bwkfanboy::Meta::USER_AGENT) {|f|
|
17
|
+
if defined?(f.meta) && f.status[0] != '200' then
|
18
|
+
Bwkfanboy::Utils.errx(1, "cannot fetch #{uri} : HTTP responce: #{f.status[0]}")
|
19
|
+
end
|
20
|
+
Bwkfanboy::Utils.veputs(1, "charset = #{f.content_type_parse[1][1]}\n") if defined?(f.meta)
|
21
|
+
if block_given?
|
22
|
+
yield f
|
23
|
+
else
|
24
|
+
return f.read
|
25
|
+
end
|
26
|
+
}
|
27
|
+
rescue
|
28
|
+
# typically Errno::ENOENT
|
29
|
+
Bwkfanboy::Utils.errx(1, "cannot fetch: #{$!}");
|
30
|
+
end
|
31
|
+
|
32
|
+
return ""
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'rss/maker'
|
2
|
+
require 'date'
|
3
|
+
require 'json'
|
4
|
+
require 'jsonschema'
|
5
|
+
|
6
|
+
require_relative 'utils'
|
7
|
+
|
8
|
+
module Bwkfanboy
|
9
|
+
class Generate
|
10
|
+
|
11
|
+
def self.validate(t)
|
12
|
+
schema = Bwkfanboy::Utils.gem_dir_system() + '/schema.js'
|
13
|
+
begin
|
14
|
+
JSON::Schema.validate(t, JSON.parse(File.read(schema)))
|
15
|
+
rescue
|
16
|
+
Bwkfanboy::Utils.errx(1, "JSON validation with schema (#{schema}) failed");
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.atom(src)
|
21
|
+
feed = RSS::Maker.make("atom") { |maker|
|
22
|
+
maker.channel.id = src['channel']['id']
|
23
|
+
maker.channel.updated = src['channel']['updated']
|
24
|
+
maker.channel.author = src['channel']['author']
|
25
|
+
maker.channel.title = src['channel']['title']
|
26
|
+
|
27
|
+
maker.channel.links.new_link {|i|
|
28
|
+
i.href = src['channel']['link']
|
29
|
+
i.rel = 'alternate'
|
30
|
+
i.type = 'text/html' # eh
|
31
|
+
}
|
32
|
+
|
33
|
+
maker.items.do_sort = true
|
34
|
+
|
35
|
+
src['x_entries'].each { |i|
|
36
|
+
maker.items.new_item do |item|
|
37
|
+
item.links.new_link {|k|
|
38
|
+
k.href = i['link']
|
39
|
+
k.rel = 'alternate'
|
40
|
+
k.type = 'text/html' # only to make happy crappy pr2nntp gateway
|
41
|
+
}
|
42
|
+
item.title = i['title']
|
43
|
+
item.author = i['author']
|
44
|
+
item.updated = i['updated']
|
45
|
+
item.content.type = src['channel']['x_entries_content_type']
|
46
|
+
|
47
|
+
case item.content.type
|
48
|
+
when 'text'
|
49
|
+
item.content.content = i['content']
|
50
|
+
when 'html'
|
51
|
+
item.content.content = i['content']
|
52
|
+
else
|
53
|
+
item.content.xhtml = i['content']
|
54
|
+
end
|
55
|
+
end
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
return feed
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/lib/bwkfanboy/parser.rb
CHANGED
@@ -8,7 +8,7 @@ module Bwkfanboy
|
|
8
8
|
|
9
9
|
# :include: ../../doc/plugin.rdoc
|
10
10
|
class Parse
|
11
|
-
ENTRIES_MAX =
|
11
|
+
ENTRIES_MAX = 128
|
12
12
|
|
13
13
|
attr_reader :opt
|
14
14
|
|
@@ -18,10 +18,10 @@ module Bwkfanboy
|
|
18
18
|
end
|
19
19
|
|
20
20
|
# Invokes #myparse & checks if it has grabbed something.
|
21
|
-
def parse()
|
21
|
+
def parse(stream)
|
22
22
|
@entries = []
|
23
23
|
begin
|
24
|
-
myparse()
|
24
|
+
myparse(stream)
|
25
25
|
rescue
|
26
26
|
@entries = []
|
27
27
|
Utils.errx(1, "parsing failed: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
|
@@ -99,7 +99,7 @@ module Bwkfanboy
|
|
99
99
|
protected
|
100
100
|
|
101
101
|
# This *must* be overridden in the child.
|
102
|
-
def myparse()
|
102
|
+
def myparse(stream)
|
103
103
|
raise "plugin isn't finished yet"
|
104
104
|
end
|
105
105
|
|
@@ -8,16 +8,16 @@ class Page < Bwkfanboy::Parse
|
|
8
8
|
URI = 'http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan'
|
9
9
|
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/bwk.html'
|
10
10
|
ENC = 'UTF-8'
|
11
|
-
VERSION =
|
11
|
+
VERSION = 2
|
12
12
|
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
13
13
|
TITLE = "Brian Kernighan's articles from Daily Princetonian"
|
14
14
|
CONTENT_TYPE = 'html'
|
15
15
|
end
|
16
16
|
|
17
|
-
def myparse()
|
17
|
+
def myparse(stream)
|
18
18
|
url = "http://www.dailyprincetonian.com"
|
19
19
|
|
20
|
-
doc = Nokogiri::HTML(
|
20
|
+
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
21
21
|
doc.xpath("//div[@class='article_item']").each {|i|
|
22
22
|
t = clean(i.xpath("h2/a").children.text())
|
23
23
|
fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)
|
@@ -5,7 +5,7 @@ class Page < Bwkfanboy::Parse
|
|
5
5
|
URI = '/usr/ports/UPDATING'
|
6
6
|
URI_DEBUG = URI
|
7
7
|
ENC = 'ASCII'
|
8
|
-
VERSION =
|
8
|
+
VERSION = 2
|
9
9
|
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
10
10
|
TITLE = "News from FreeBSD ports"
|
11
11
|
CONTENT_TYPE = 'text'
|
@@ -24,7 +24,7 @@ class Page < Bwkfanboy::Parse
|
|
24
24
|
return t
|
25
25
|
end
|
26
26
|
|
27
|
-
def myparse()
|
27
|
+
def myparse(stream)
|
28
28
|
re_u = /^(\d{8}):$/
|
29
29
|
re_t1 = /^ {2}AFFECTS:\s+(.+)$/
|
30
30
|
re_t2 = /^\s+(.+)$/
|
@@ -33,7 +33,7 @@ class Page < Bwkfanboy::Parse
|
|
33
33
|
ready = false
|
34
34
|
mode = nil
|
35
35
|
t = l = u = a = c = nil
|
36
|
-
while line =
|
36
|
+
while line = stream.gets
|
37
37
|
line.rstrip!
|
38
38
|
|
39
39
|
if line =~ re_u then
|
@@ -17,17 +17,17 @@ class Page < Bwkfanboy::Parse
|
|
17
17
|
URI = 'http://www.quora.com/#{opt[0]}/answers'
|
18
18
|
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
|
19
19
|
ENC = 'UTF-8'
|
20
|
-
VERSION =
|
20
|
+
VERSION = 5
|
21
21
|
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
22
22
|
TITLE = "Last n answers (per-user) from Quora; requires nodejs"
|
23
23
|
CONTENT_TYPE = 'html'
|
24
24
|
end
|
25
25
|
|
26
|
-
def myparse()
|
26
|
+
def myparse(stream)
|
27
27
|
profile = opt[0] # for example, 'Brandon-Smietana'
|
28
28
|
|
29
29
|
# read stdin
|
30
|
-
doc = Nokogiri::HTML(
|
30
|
+
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
31
31
|
|
32
32
|
# extract & evaluate JavaScript into tstp
|
33
33
|
tstp = nil
|
data/lib/bwkfanboy/utils.rb
CHANGED
@@ -7,7 +7,7 @@ require 'active_support/core_ext/module/attribute_accessors'
|
|
7
7
|
module Bwkfanboy
|
8
8
|
module Meta
|
9
9
|
NAME = 'bwkfanboy'
|
10
|
-
VERSION = '
|
10
|
+
VERSION = '1.1.4'
|
11
11
|
USER_AGENT = "#{NAME}/#{VERSION} (#{RUBY_PLATFORM}; N; #{Encoding.default_external.name}; #{RUBY_ENGINE}; rv:#{RUBY_VERSION}.#{RUBY_PATCHLEVEL})"
|
12
12
|
PLUGIN_CLASS = 'Page'
|
13
13
|
DIR_TMP = "/tmp/#{Meta::NAME}/#{ENV['USER']}"
|
@@ -89,7 +89,7 @@ module Bwkfanboy
|
|
89
89
|
# TODO get rid of eval()
|
90
90
|
fail "class #{class_name} isn't defined" if (! eval("defined?#{class_name}") || ! eval(class_name).is_a?(Class) )
|
91
91
|
rescue LoadError
|
92
|
-
errx(1, "cannot load plugin '#{path}'");
|
92
|
+
errx(1, "cannot load plugin '#{path}' #{$!}");
|
93
93
|
rescue Exception
|
94
94
|
errx(1, "plugin '#{path}' has errors: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
|
95
95
|
end
|
data/test/test_fetch.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
|
-
#!/usr/bin/env ruby19
|
2
|
-
|
3
|
-
require 'minitest/autorun'
|
4
1
|
require 'digest/md5'
|
5
2
|
|
6
3
|
require_relative '../lib/bwkfanboy/utils'
|
7
|
-
require_relative 'ts_utils
|
4
|
+
require_relative 'ts_utils'
|
8
5
|
|
9
6
|
# TODO add HTTP 404 check; drop connection from server during HTTP 200
|
10
7
|
# replay...
|
data/test/test_generate.rb
CHANGED
data/test/test_parse.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
#!/usr/bin/env ruby19
|
2
|
-
|
3
|
-
require 'minitest/autorun'
|
4
1
|
require 'digest/md5'
|
5
2
|
|
6
3
|
require_relative '../lib/bwkfanboy/utils'
|
@@ -17,16 +14,16 @@ class TestParse < MiniTest::Unit::TestCase
|
|
17
14
|
|
18
15
|
def test_empty_plugin
|
19
16
|
cmd CMD
|
20
|
-
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{@tpath}plugins/empty.rb ")
|
17
|
+
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{Dir.pwd}/#{@tpath}plugins/empty.rb ")
|
21
18
|
assert_equal(1, r[0])
|
22
19
|
assert_match(/plugin .+ has errors: class Page isn't defined/, r[1])
|
23
20
|
end
|
24
21
|
|
25
22
|
def test_plugin_parse
|
26
23
|
cmd CMD
|
27
|
-
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{@tpath}plugins/bwk.rb < #{@tpath}semis/bwk.html")
|
24
|
+
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{Dir.pwd}/#{@tpath}../lib/bwkfanboy/plugins/bwk.rb < #{@tpath}semis/bwk.html")
|
28
25
|
assert_equal(0, r[0])
|
29
|
-
# bin/bwkfanboy_parse
|
30
|
-
assert_equal('
|
26
|
+
# bin/bwkfanboy_parse `pwd`/lib/bwkfanboy/plugins/bwk.rb < test/semis/bwk.html | md5
|
27
|
+
assert_equal('a433a4a27bafb060a41aa85a40808056', Digest::MD5.hexdigest(r[2]))
|
31
28
|
end
|
32
29
|
end
|
data/test/test_server.rb
CHANGED
data/test/ts_utils.rb
CHANGED
metadata
CHANGED
@@ -3,10 +3,10 @@ name: bwkfanboy
|
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
|
-
- 0
|
7
6
|
- 1
|
8
|
-
-
|
9
|
-
|
7
|
+
- 1
|
8
|
+
- 4
|
9
|
+
version: 1.1.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Alexander Gromnitsky
|
@@ -14,27 +14,29 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-11-08 00:00:00 +02:00
|
18
18
|
default_executable: bwkfanboy
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: activesupport
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
27
28
|
segments:
|
28
29
|
- 3
|
29
30
|
- 0
|
30
|
-
-
|
31
|
-
version: 3.0.
|
31
|
+
- 1
|
32
|
+
version: 3.0.1
|
32
33
|
type: :runtime
|
33
34
|
version_requirements: *id001
|
34
35
|
- !ruby/object:Gem::Dependency
|
35
36
|
name: nokogiri
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -49,6 +51,7 @@ dependencies:
|
|
49
51
|
name: open4
|
50
52
|
prerelease: false
|
51
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
@@ -63,6 +66,7 @@ dependencies:
|
|
63
66
|
name: jsonschema
|
64
67
|
prerelease: false
|
65
68
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
66
70
|
requirements:
|
67
71
|
- - ">="
|
68
72
|
- !ruby/object:Gem::Version
|
@@ -84,15 +88,14 @@ executables:
|
|
84
88
|
extensions: []
|
85
89
|
|
86
90
|
extra_rdoc_files:
|
87
|
-
- bin/bwkfanboy_generate
|
88
|
-
- bin/bwkfanboy_parse
|
89
|
-
- bin/bwkfanboy
|
90
|
-
- bin/bwkfanboy_server
|
91
|
-
- bin/bwkfanboy_fetch
|
92
|
-
- doc/plugin.rdoc
|
93
|
-
- doc/README.rdoc
|
94
91
|
- doc/LICENSE
|
95
92
|
- doc/NEWS.rdoc
|
93
|
+
- doc/README.rdoc
|
94
|
+
- doc/plugin.rdoc
|
95
|
+
- doc/bwkfanboy_fetch.rdoc
|
96
|
+
- doc/bwkfanboy_generate.rdoc
|
97
|
+
- doc/bwkfanboy_parse.rdoc
|
98
|
+
- doc/bwkfanboy_server.rdoc
|
96
99
|
files:
|
97
100
|
- lib/bwkfanboy/plugins/bwk.rb
|
98
101
|
- lib/bwkfanboy/plugins/freebsd-ports-update.rb
|
@@ -101,20 +104,25 @@ files:
|
|
101
104
|
- lib/bwkfanboy/parser.rb
|
102
105
|
- lib/bwkfanboy/utils.rb
|
103
106
|
- lib/bwkfanboy/schema.js
|
107
|
+
- lib/bwkfanboy/fetch.rb
|
108
|
+
- lib/bwkfanboy/generate.rb
|
104
109
|
- bin/bwkfanboy_generate
|
105
110
|
- bin/bwkfanboy_parse
|
106
111
|
- bin/bwkfanboy
|
107
112
|
- bin/bwkfanboy_server
|
108
113
|
- bin/bwkfanboy_fetch
|
109
|
-
- doc/plugin.rdoc
|
110
|
-
- doc/README.rdoc
|
111
114
|
- doc/LICENSE
|
112
115
|
- doc/NEWS.rdoc
|
116
|
+
- doc/README.rdoc
|
117
|
+
- doc/plugin.rdoc
|
118
|
+
- doc/bwkfanboy_fetch.rdoc
|
119
|
+
- doc/bwkfanboy_generate.rdoc
|
120
|
+
- doc/bwkfanboy_parse.rdoc
|
121
|
+
- doc/bwkfanboy_server.rdoc
|
113
122
|
- README.rdoc
|
114
123
|
- Rakefile
|
115
124
|
- TODO
|
116
125
|
- test/plugins/empty.rb
|
117
|
-
- test/plugins/bwk.rb
|
118
126
|
- test/semis/bwk.html
|
119
127
|
- test/semis/bwk.json
|
120
128
|
- test/semis/quora.html
|
@@ -134,20 +142,23 @@ licenses: []
|
|
134
142
|
post_install_message:
|
135
143
|
rdoc_options:
|
136
144
|
- -m
|
137
|
-
-
|
145
|
+
- doc/README.rdoc
|
138
146
|
- -x
|
139
147
|
- plugins
|
140
148
|
require_paths:
|
141
149
|
- lib
|
142
150
|
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
none: false
|
143
152
|
requirements:
|
144
153
|
- - ">="
|
145
154
|
- !ruby/object:Gem::Version
|
146
155
|
segments:
|
147
156
|
- 1
|
148
157
|
- 9
|
149
|
-
|
158
|
+
- 2
|
159
|
+
version: 1.9.2
|
150
160
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
151
162
|
requirements:
|
152
163
|
- - ">="
|
153
164
|
- !ruby/object:Gem::Version
|
@@ -157,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
157
168
|
requirements: []
|
158
169
|
|
159
170
|
rubyforge_project:
|
160
|
-
rubygems_version: 1.3.
|
171
|
+
rubygems_version: 1.3.7
|
161
172
|
signing_key:
|
162
173
|
specification_version: 3
|
163
174
|
summary: A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.
|
data/test/plugins/bwk.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
|
3
|
-
class Page < Bwkfanboy::Parse
|
4
|
-
module Meta
|
5
|
-
URI = "html/bwk.html"
|
6
|
-
ENC = 'UTF-8'
|
7
|
-
VERSION = 1
|
8
|
-
COPYRIGHT = '(c) 2010 Alexander Gromnitsky'
|
9
|
-
TITLE = "Brian Kernighan's articles from Daily Princetonian"
|
10
|
-
CONTENT_TYPE = 'html'
|
11
|
-
end
|
12
|
-
|
13
|
-
def myparse()
|
14
|
-
url = "http://www.dailyprincetonian.com"
|
15
|
-
|
16
|
-
doc = Nokogiri::HTML(STDIN, nil, Meta::ENC)
|
17
|
-
doc.xpath("//div[@class='article_item']").each {|i|
|
18
|
-
t = clean(i.xpath("h2/a").children.text())
|
19
|
-
fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)
|
20
|
-
link = clean(i.xpath("h2/a")[0].attributes['href'].value())
|
21
|
-
l = url + link + "print"
|
22
|
-
u = date(i.xpath("h2").children[1].text())
|
23
|
-
a = clean(i.xpath("div/span/a[1]").children.text())
|
24
|
-
c = clean(i.xpath("div[@class='summary']").text())
|
25
|
-
|
26
|
-
self << { title: t, link: l, updated: u, author: a, content: c }
|
27
|
-
}
|
28
|
-
end
|
29
|
-
end
|