bwkfanboy 0.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +8 -4
- data/Rakefile +9 -10
- data/bin/bwkfanboy +30 -8
- data/bin/bwkfanboy_fetch +6 -23
- data/bin/bwkfanboy_generate +7 -62
- data/bin/bwkfanboy_parse +3 -9
- data/bin/bwkfanboy_server +9 -36
- data/doc/NEWS.rdoc +16 -2
- data/doc/README.rdoc +8 -4
- data/doc/bwkfanboy_fetch.rdoc +4 -0
- data/doc/bwkfanboy_generate.rdoc +7 -0
- data/doc/bwkfanboy_parse.rdoc +7 -0
- data/doc/bwkfanboy_server.rdoc +35 -0
- data/doc/plugin.rdoc +32 -29
- data/lib/bwkfanboy/fetch.rb +36 -0
- data/lib/bwkfanboy/generate.rb +63 -0
- data/lib/bwkfanboy/parser.rb +4 -4
- data/lib/bwkfanboy/plugins/bwk.rb +3 -3
- data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +3 -3
- data/lib/bwkfanboy/plugins/quora.rb +3 -3
- data/lib/bwkfanboy/utils.rb +2 -2
- data/test/test_fetch.rb +1 -4
- data/test/test_generate.rb +0 -3
- data/test/test_parse.rb +4 -7
- data/test/test_server.rb +0 -3
- data/test/ts_utils.rb +8 -0
- metadata +30 -19
- data/test/plugins/bwk.rb +0 -29
data/README.rdoc
CHANGED
@@ -11,6 +11,10 @@ general assistance.
|
|
11
11
|
than the whole gem on rubygems.org, so grab the source before
|
12
12
|
struggling).
|
13
13
|
|
14
|
+
Plugins from version 1.1.4 are *incompatible* with the 0.1.x
|
15
|
+
series. Please reread in doc/plugin.rdoc the example of the skeleton
|
16
|
+
plugin.
|
17
|
+
|
14
18
|
= Architecture
|
15
19
|
|
16
20
|
== Plugins
|
@@ -18,10 +22,10 @@ struggling).
|
|
18
22
|
bwkfanboy comes with several plugins. One of them, for example, parses a
|
19
23
|
search page of dailyprincetonian.com looking for bwk's articles.
|
20
24
|
|
21
|
-
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
|
22
|
-
|
25
|
+
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse parent,
|
26
|
+
overriding 1 method.
|
23
27
|
|
24
|
-
|
28
|
+
Plugins can be in system
|
25
29
|
|
26
30
|
`gem env gemdir`/gems/bwkfanboy-x.y.z/lib/bwkfanboy/plugins
|
27
31
|
|
@@ -89,7 +93,7 @@ There are 2 method to get an Atom feed via HTTP:
|
|
89
93
|
|
90
94
|
2. Small *bwkfanboy_server* HTTP server. It can run from any user and
|
91
95
|
thus is able to inherit env variables for discovering your HOME
|
92
|
-
directory. Read
|
96
|
+
directory. Read doc/bwkfanboy_server.rdoc to know how to operate it.
|
93
97
|
|
94
98
|
= License
|
95
99
|
:include: doc/LICENSE
|
data/Rakefile
CHANGED
@@ -9,12 +9,12 @@ require 'rake/testtask'
|
|
9
9
|
spec = Gem::Specification.new() {|i|
|
10
10
|
i.name = "bwkfanboy"
|
11
11
|
i.summary = 'A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.'
|
12
|
-
i.version =
|
12
|
+
i.version = `bin/#{i.name} -V`
|
13
13
|
i.author = 'Alexander Gromnitsky'
|
14
14
|
i.email = 'alexander.gromnitsky@gmail.com'
|
15
|
-
i.homepage =
|
15
|
+
i.homepage = "http://github.com/gromnitsky/#{i.name}"
|
16
16
|
i.platform = Gem::Platform::RUBY
|
17
|
-
i.required_ruby_version = '>= 1.9'
|
17
|
+
i.required_ruby_version = '>= 1.9.2'
|
18
18
|
i.files = FileList['lib/**/*', 'bin/*', 'doc/*', '[A-Z]*', 'test/**/*']
|
19
19
|
|
20
20
|
i.executables = FileList['bin/*'].gsub(/^bin\//, '')
|
@@ -22,10 +22,10 @@ spec = Gem::Specification.new() {|i|
|
|
22
22
|
|
23
23
|
i.test_files = FileList['test/test_*.rb']
|
24
24
|
|
25
|
-
i.rdoc_options << '-m' << '
|
26
|
-
i.extra_rdoc_files = FileList['
|
25
|
+
i.rdoc_options << '-m' << 'doc/README.rdoc' << '-x' << 'plugins'
|
26
|
+
i.extra_rdoc_files = FileList['doc/*']
|
27
27
|
|
28
|
-
i.add_dependency('activesupport', '>= 3.0.
|
28
|
+
i.add_dependency('activesupport', '>= 3.0.1')
|
29
29
|
i.add_dependency('nokogiri', '>= 1.4.3')
|
30
30
|
i.add_dependency('open4', '>= 1.0.1')
|
31
31
|
i.add_dependency('jsonschema', '>= 2.0.0')
|
@@ -36,12 +36,11 @@ Rake::GemPackageTask.new(spec).define()
|
|
36
36
|
task(default: %(repackage))
|
37
37
|
|
38
38
|
Rake::RDocTask.new('doc') {|i|
|
39
|
-
i.main =
|
40
|
-
i.rdoc_files = FileList['doc/*', 'lib/**/*.rb'
|
41
|
-
i.rdoc_files.exclude("lib/**/plugins"
|
39
|
+
i.main = 'doc/README.rdoc'
|
40
|
+
i.rdoc_files = FileList['doc/*', 'lib/**/*.rb']
|
41
|
+
i.rdoc_files.exclude("lib/**/plugins")
|
42
42
|
}
|
43
43
|
|
44
44
|
Rake::TestTask.new() {|i|
|
45
45
|
i.test_files = FileList['test/test_*.rb']
|
46
|
-
i.verbose = true
|
47
46
|
}
|
data/bin/bwkfanboy
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
4
|
# This program is executed by bin/bwkfanboy_server to do all dirty work:
|
@@ -11,13 +11,14 @@
|
|
11
11
|
#
|
12
12
|
# % bwkfanboy -h
|
13
13
|
#
|
14
|
-
# to get some basic help
|
14
|
+
# to get some basic help--read about Bwkfanboy module.
|
15
15
|
|
16
16
|
require 'shellwords'
|
17
17
|
require_relative '../lib/bwkfanboy/parser'
|
18
18
|
|
19
19
|
$conf = {
|
20
|
-
mode: '
|
20
|
+
mode: 'fast',
|
21
|
+
debug: false,
|
21
22
|
banner: "Usage: #{File.basename($0)} [options] plugin-name"
|
22
23
|
}
|
23
24
|
|
@@ -87,7 +88,8 @@ o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner]) # create OptionParser object
|
|
87
88
|
o.on('-i', 'Show some info about the plugin') { |i| $conf[:mode] = 'info' }
|
88
89
|
o.on('-l', 'List all plugins') { |i| $conf[:mode] = 'list' }
|
89
90
|
o.on('-p', 'List all plugins paths') { |i| $conf[:mode] = 'path' }
|
90
|
-
o.on('-
|
91
|
+
o.on('-O', '(ignore this) Execute all bwkfanboy_* utils in a pipe') { |i| $conf[:mode] = 'pipe' }
|
92
|
+
o.on('-D', '(ignore this) Use URI_DEBUG const instead URI in plugins') { |i| $conf[:debug] = true }
|
91
93
|
Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
|
92
94
|
|
93
95
|
plugin = Plugin.new(ARGV[0])
|
@@ -107,12 +109,11 @@ when 'info'
|
|
107
109
|
plugin.load(opt).dump_info
|
108
110
|
when 'version'
|
109
111
|
puts Bwkfanboy::Meta::VERSION
|
110
|
-
|
111
|
-
# A pipe mode
|
112
|
+
when 'pipe'
|
112
113
|
pn = plugin.load(opt)
|
113
114
|
cmd = "./bwkfanboy_fetch | ./bwkfanboy_parse '#{plugin.path}' #{opt.size != 0 ? Shellwords.join(opt) : ''} | ./bwkfanboy_generate"
|
114
115
|
if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
|
115
|
-
puts (
|
116
|
+
puts (!$conf[:debug] ? pn.uri() : pn.class::Meta::URI_DEBUG)
|
116
117
|
puts cmd
|
117
118
|
exit 0
|
118
119
|
end
|
@@ -121,10 +122,31 @@ else
|
|
121
122
|
Dir.chdir(File.dirname(File.expand_path($0)))
|
122
123
|
|
123
124
|
pipe = IO.popen(cmd, 'w+')
|
124
|
-
pipe.puts (
|
125
|
+
pipe.puts (!$conf[:debug] ? pn.uri : pn.class::Meta::URI_DEBUG)
|
125
126
|
pipe.close_write
|
126
127
|
while line = pipe.gets
|
127
128
|
puts line
|
128
129
|
end
|
129
130
|
pipe.close
|
131
|
+
|
132
|
+
else
|
133
|
+
# a 'new', faster way
|
134
|
+
pn = plugin.load(opt)
|
135
|
+
|
136
|
+
require_relative '../lib/bwkfanboy/fetch'
|
137
|
+
require_relative '../lib/bwkfanboy/generate'
|
138
|
+
|
139
|
+
# 1. fetch & parse
|
140
|
+
pn = Page.new(opt)
|
141
|
+
Bwkfanboy::Fetch.cat(!$conf[:debug] ? pn.uri() : pn.class::Meta::URI_DEBUG) {|stream|
|
142
|
+
pn.parse(stream)
|
143
|
+
}
|
144
|
+
|
145
|
+
if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
|
146
|
+
pn.dump()
|
147
|
+
exit 0
|
148
|
+
end
|
149
|
+
|
150
|
+
# 2.generate
|
151
|
+
puts Bwkfanboy::Generate.atom(JSON.parse(pn.to_json))
|
130
152
|
end
|
data/bin/bwkfanboy_fetch
CHANGED
@@ -1,30 +1,13 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
|
5
|
-
# read for the local file) and print to stdout.
|
6
|
-
|
7
|
-
require 'open-uri'
|
8
|
-
|
9
|
-
require_relative '../lib/bwkfanboy/utils'
|
4
|
+
require_relative '../lib/bwkfanboy/fetch'
|
10
5
|
|
11
6
|
$conf = { banner: "Usage: #{File.basename($0)} [options] < uri" }
|
12
7
|
|
13
8
|
Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], nil, true)
|
14
9
|
|
15
|
-
uri = gets
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
begin
|
20
|
-
open(uri, "User-Agent" => Bwkfanboy::Meta::USER_AGENT) {|f|
|
21
|
-
if defined?(f.meta) && f.status[0] != '200' then
|
22
|
-
Bwkfanboy::Utils.errx(1, "cannot fetch #{uri} : HTTP responce: #{f.status[0]}")
|
23
|
-
end
|
24
|
-
Bwkfanboy::Utils.veputs(1, "charset = #{f.content_type_parse[1][1]}\n") if defined?(f.meta)
|
25
|
-
f.each_line {|i| puts i}
|
26
|
-
}
|
27
|
-
rescue
|
28
|
-
# typically Errno::ENOENT
|
29
|
-
Bwkfanboy::Utils.errx(1, "cannot fetch: #{$!}");
|
30
|
-
end
|
10
|
+
uri = gets
|
11
|
+
Bwkfanboy::Fetch.cat(uri) {|f|
|
12
|
+
f.each_line {|line| puts line }
|
13
|
+
}
|
data/bin/bwkfanboy_generate
CHANGED
@@ -1,24 +1,16 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
|
5
|
-
# result to stdout in UTF-8.
|
6
|
-
#
|
7
|
-
# One can validate the JSON by providing '--check' command line option
|
8
|
-
# (by default the validating is off).
|
9
|
-
|
10
|
-
require 'rss/maker'
|
11
|
-
require 'date'
|
12
|
-
require 'json'
|
13
|
-
require 'jsonschema'
|
14
|
-
|
15
|
-
require_relative '../lib/bwkfanboy/utils'
|
4
|
+
require_relative '../lib/bwkfanboy/generate'
|
16
5
|
|
17
6
|
$conf = {
|
18
7
|
banner: "Usage: #{File.basename($0)} [options] < json",
|
19
8
|
check: false
|
20
9
|
}
|
21
10
|
|
11
|
+
# we are expection the input ONLY in UTF-8
|
12
|
+
Encoding.default_external = 'UTF-8'
|
13
|
+
|
22
14
|
o = Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner])
|
23
15
|
o.on('--check', 'Validate the input (slow!)') { |i| $conf[:check] = true }
|
24
16
|
Bwkfanboy::Utils.cl_parse(ARGV, $conf[:banner], o) # run cl parser
|
@@ -29,52 +21,5 @@ rescue
|
|
29
21
|
Bwkfanboy::Utils.errx(1, "stdin had invalid JSON");
|
30
22
|
end
|
31
23
|
|
32
|
-
|
33
|
-
|
34
|
-
if $conf[:check] then
|
35
|
-
begin
|
36
|
-
JSON::Schema.validate(j, JSON.parse(File.read(schema)))
|
37
|
-
rescue
|
38
|
-
Bwkfanboy::Utils.errx(1, "JSON validation with schema (#{schema}) failed");
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
feed = RSS::Maker.make("atom") { |maker|
|
43
|
-
maker.channel.id = j['channel']['id']
|
44
|
-
maker.channel.updated = j['channel']['updated']
|
45
|
-
maker.channel.author = j['channel']['author']
|
46
|
-
maker.channel.title = j['channel']['title']
|
47
|
-
|
48
|
-
maker.channel.links.new_link {|i|
|
49
|
-
i.href = j['channel']['link']
|
50
|
-
i.rel = 'alternate'
|
51
|
-
i.type = 'text/html' # eh
|
52
|
-
}
|
53
|
-
|
54
|
-
maker.items.do_sort = true
|
55
|
-
|
56
|
-
j['x_entries'].each { |i|
|
57
|
-
maker.items.new_item do |item|
|
58
|
-
item.links.new_link {|k|
|
59
|
-
k.href = i['link']
|
60
|
-
k.rel = 'alternate'
|
61
|
-
k.type = 'text/html' # only to make happy crappy pr2nntp gateway
|
62
|
-
}
|
63
|
-
item.title = i['title']
|
64
|
-
item.author = i['author']
|
65
|
-
item.updated = i['updated']
|
66
|
-
item.content.type = j['channel']['x_entries_content_type']
|
67
|
-
|
68
|
-
case item.content.type
|
69
|
-
when 'text'
|
70
|
-
item.content.content = i['content']
|
71
|
-
when 'html'
|
72
|
-
item.content.content = i['content']
|
73
|
-
else
|
74
|
-
item.content.xhtml = i['content']
|
75
|
-
end
|
76
|
-
end
|
77
|
-
}
|
78
|
-
}
|
79
|
-
|
80
|
-
puts feed
|
24
|
+
if $conf[:check] then Bwkfanboy::Generate.validate(j) end
|
25
|
+
puts Bwkfanboy::Generate.atom(j)
|
data/bin/bwkfanboy_parse
CHANGED
@@ -1,12 +1,6 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
# Take 1 command line parameter: a full path to a plugin.
|
5
|
-
#
|
6
|
-
# Read stdin for a HTML, parse it and print the result to stdout in JSON
|
7
|
-
# format. If '-vv' command line parameters were given, output will be in
|
8
|
-
# 'key: value' pairs and <em>not</em> in JSON.
|
9
|
-
|
10
4
|
require_relative '../lib/bwkfanboy/parser'
|
11
5
|
|
12
6
|
$conf = {
|
@@ -19,12 +13,12 @@ if ARGV.size == 0 then
|
|
19
13
|
abort($conf[:banner])
|
20
14
|
else
|
21
15
|
Bwkfanboy::Utils.plugin_load(ARGV[0], Bwkfanboy::Meta::PLUGIN_CLASS)
|
22
|
-
end
|
16
|
+
end
|
23
17
|
|
24
18
|
opt = Bwkfanboy::Utils.plugin_opts(ARGV)
|
25
19
|
pn = Page.new(opt)
|
26
20
|
pn.check()
|
27
|
-
pn.parse()
|
21
|
+
pn.parse(STDIN)
|
28
22
|
|
29
23
|
if Bwkfanboy::Utils.cfg[:verbose] >= 2 then
|
30
24
|
pn.dump()
|
data/bin/bwkfanboy_server
CHANGED
@@ -1,42 +1,9 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# -*-ruby-*-
|
3
3
|
|
4
|
-
# Start a HTTP server (by default on 127.0.0.1:9042). To get Atom feeds
|
5
|
-
# from it, initiate GET request with URI
|
6
|
-
#
|
7
|
-
# http://localhost:9042/?p=PLUGIN
|
8
|
-
#
|
9
|
-
# where +PLUGIN+ is a name of a bwkfanboy's plugin (without '.re' suffix).
|
10
|
-
# If the plugin requires additional options you can specify them like:
|
11
|
-
#
|
12
|
-
# http://localhost:9042/?p=PLUGIN&o=opt1%20%22opt2%20has%20spaces%22
|
13
|
-
#
|
14
|
-
# where <tt>opt1%20%22opt2%20has%20spaces%22</tt> is a encoded string
|
15
|
-
# <tt>opt1 "opt2 has spaces"</tt>.
|
16
|
-
#
|
17
|
-
# To list all available plugins, point you browser to
|
18
|
-
#
|
19
|
-
# http://localhost:9042/list
|
20
|
-
#
|
21
|
-
# The server is intended to run from a non-root user from
|
22
|
-
# <tt>~/.login</tt> file. It can detach from a terminal if you give it
|
23
|
-
# '-d' command line option.
|
24
|
-
#
|
25
|
-
# For other help, type:
|
26
|
-
#
|
27
|
-
# bwkfanboy_server -h
|
28
|
-
#
|
29
|
-
# The server maintains 2 logs:
|
30
|
-
#
|
31
|
-
# /tmp/bwkfanboy/USER/log/bwkfanboy_server.log
|
32
|
-
# /tmp/bwkfanboy/USER/log/bwkfanboy_server-access.log
|
33
|
-
#
|
34
|
-
# The file with a pid:
|
35
|
-
#
|
36
|
-
# /tmp/bwkfanboy/USER/bwkfanboy_server.pid
|
37
|
-
|
38
4
|
require 'shellwords'
|
39
5
|
require 'webrick'
|
6
|
+
require 'date'
|
40
7
|
require_relative '../lib/bwkfanboy/utils'
|
41
8
|
|
42
9
|
$conf = {
|
@@ -78,8 +45,14 @@ class FeedServlet < WEBrick::HTTPServlet::AbstractServlet # :nodoc: all
|
|
78
45
|
if r[0] != 0 then
|
79
46
|
raise WEBrick::HTTPStatus::InternalServerError.new("Errors in the pipeline:\n\n #{r[1]}")
|
80
47
|
end
|
81
|
-
|
48
|
+
|
82
49
|
res.body = r[2]
|
50
|
+
|
51
|
+
# search for <updated> tag and set Last-Modified header
|
52
|
+
if (m = r[2].match('<updated>(.+?)</updated>'))
|
53
|
+
res['Last-Modified'] = DateTime.parse(m.to_s).httpdate
|
54
|
+
end
|
55
|
+
|
83
56
|
else
|
84
57
|
raise WEBrick::HTTPStatus::InternalServerError.new("Parameter 'p' required")
|
85
58
|
end
|
data/doc/NEWS.rdoc
CHANGED
@@ -1,6 +1,20 @@
|
|
1
|
-
===
|
1
|
+
=== 1.1.4
|
2
2
|
|
3
|
-
-
|
3
|
+
- INCOMPATIBILITY: from now on, all plugins must do NOT read the stdin
|
4
|
+
but bwkfanboy's provided stream. See doc/plugins.rdoc.
|
5
|
+
|
6
|
+
- Moved the code from bwkfanboy_* to libraries.
|
7
|
+
|
8
|
+
- From now on, bwkfanboy util by default do not run utils in a pipe but
|
9
|
+
uses libraries directly. One can restore the old functionality with
|
10
|
+
'-O' CL swith.
|
11
|
+
|
12
|
+
=== 0.1.4
|
13
|
+
|
14
|
+
- The minimum required Ruby version is 1.9.2.
|
15
|
+
- bwkfanboy_server now inserts a Last-Modified header.
|
16
|
+
- Fixed (?) a bug in bwkfanboy_generate with external encoding.
|
17
|
+
- Updated tests for Ruby 1.9.2.
|
4
18
|
|
5
19
|
=== 0.1.3
|
6
20
|
|
data/doc/README.rdoc
CHANGED
@@ -11,6 +11,10 @@ general assistance.
|
|
11
11
|
than the whole gem on rubygems.org, so grab the source before
|
12
12
|
struggling).
|
13
13
|
|
14
|
+
Plugins from version 1.1.4 are *incompatible* with the 0.1.x
|
15
|
+
series. Please reread in doc/plugin.rdoc the example of the skeleton
|
16
|
+
plugin.
|
17
|
+
|
14
18
|
= Architecture
|
15
19
|
|
16
20
|
== Plugins
|
@@ -18,10 +22,10 @@ struggling).
|
|
18
22
|
bwkfanboy comes with several plugins. One of them, for example, parses a
|
19
23
|
search page of dailyprincetonian.com looking for bwk's articles.
|
20
24
|
|
21
|
-
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
|
22
|
-
|
25
|
+
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse parent,
|
26
|
+
overriding 1 method.
|
23
27
|
|
24
|
-
|
28
|
+
Plugins can be in system
|
25
29
|
|
26
30
|
`gem env gemdir`/gems/bwkfanboy-x.y.z/lib/bwkfanboy/plugins
|
27
31
|
|
@@ -89,7 +93,7 @@ There are 2 method to get an Atom feed via HTTP:
|
|
89
93
|
|
90
94
|
2. Small *bwkfanboy_server* HTTP server. It can run from any user and
|
91
95
|
thus is able to inherit env variables for discovering your HOME
|
92
|
-
directory. Read
|
96
|
+
directory. Read doc/bwkfanboy_server.rdoc to know how to operate it.
|
93
97
|
|
94
98
|
= License
|
95
99
|
:include: doc/LICENSE
|
@@ -0,0 +1,7 @@
|
|
1
|
+
= bwkfanboy_parse
|
2
|
+
|
3
|
+
Takes 1 command line parameter: a full path to a plugin.
|
4
|
+
|
5
|
+
Reads stdin for a HTML, parses it and prints the result to stdout in
|
6
|
+
JSON format. If '-vv' command line parameters were given, output will
|
7
|
+
be in 'key: value' pairs and <em>not</em> in JSON.
|
@@ -0,0 +1,35 @@
|
|
1
|
+
= bwkfanboy_server
|
2
|
+
|
3
|
+
Start a HTTP server (by default on 127.0.0.1:9042). To get Atom feeds
|
4
|
+
from it, initiate GET request with URI
|
5
|
+
|
6
|
+
http://localhost:9042/?p=PLUGIN
|
7
|
+
|
8
|
+
where +PLUGIN+ is a name of a bwkfanboy's plugin (without '.re' suffix).
|
9
|
+
If the plugin requires additional options you can specify them like:
|
10
|
+
|
11
|
+
http://localhost:9042/?p=PLUGIN&o=opt1%20%22opt2%20has%20spaces%22
|
12
|
+
|
13
|
+
where <tt>opt1%20%22opt2%20has%20spaces%22</tt> is a encoded string
|
14
|
+
<tt>opt1 "opt2 has spaces"</tt>.
|
15
|
+
|
16
|
+
To list all available plugins, point you browser to
|
17
|
+
|
18
|
+
http://localhost:9042/list
|
19
|
+
|
20
|
+
The server is intended to run from a non-root user from
|
21
|
+
<tt>~/.login</tt> file. It can detach from a terminal if you give it
|
22
|
+
'-d' command line option.
|
23
|
+
|
24
|
+
For other help, type:
|
25
|
+
|
26
|
+
bwkfanboy_server -h
|
27
|
+
|
28
|
+
The server maintains 2 logs:
|
29
|
+
|
30
|
+
/tmp/bwkfanboy/USER/log/bwkfanboy_server.log
|
31
|
+
/tmp/bwkfanboy/USER/log/bwkfanboy_server-access.log
|
32
|
+
|
33
|
+
The file with a pid:
|
34
|
+
|
35
|
+
/tmp/bwkfanboy/USER/bwkfanboy_server.pid
|
data/doc/plugin.rdoc
CHANGED
@@ -1,12 +1,11 @@
|
|
1
|
-
=
|
1
|
+
= How to Write a \Plugin
|
2
2
|
|
3
3
|
First of all, look at examples provided with bwkfanboy. They were
|
4
4
|
intended to be 100% working because I was writing them for myself.
|
5
5
|
|
6
|
-
Basically, all you need is to write a class named _Page_ that
|
7
|
-
|
8
|
-
|
9
|
-
class.
|
6
|
+
Basically, all you need is to write a class named _Page_ that inherits
|
7
|
+
class Bwkfanboy::Parse, override in the child \#myparse method and write
|
8
|
+
a simple module named _Meta_ inside your _Page_ class.
|
10
9
|
|
11
10
|
== Skeleton
|
12
11
|
|
@@ -24,9 +23,9 @@ Here is a skeleton of a plugin:
|
|
24
23
|
CONTENT_TYPE = 'html'
|
25
24
|
end
|
26
25
|
|
27
|
-
def myparse()
|
28
|
-
# read
|
29
|
-
doc = Nokogiri::HTML(
|
26
|
+
def myparse(stream)
|
27
|
+
# read 'stream' IO object and parse it
|
28
|
+
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
30
29
|
doc.xpath("XPATH QUERY").each {|i|
|
31
30
|
t = clean(i.xpath("XPATH QUERY").text())
|
32
31
|
l = clean(i.xpath("XPATH QUERY").text())
|
@@ -52,8 +51,8 @@ it should.
|
|
52
51
|
|
53
52
|
=== \Meta
|
54
53
|
|
55
|
-
Module _Meta_ can have
|
56
|
-
|
54
|
+
Module _Meta_ can only have constants--and *all* constants listed in the
|
55
|
+
skeleton are mandatory.
|
57
56
|
|
58
57
|
* <tt>URI</tt>--can be a <tt>http(s)://</tt> or <tt>ftp://</tt> URL
|
59
58
|
or just a path to a file on your local machine, as
|
@@ -75,13 +74,17 @@ the skeleton are required.
|
|
75
74
|
|
76
75
|
=== myparse
|
77
76
|
|
78
|
-
In
|
79
|
-
HTML you want to parse. The general idea:
|
77
|
+
In \#myparse method read 'stream' IO object. The contents of it is the
|
78
|
+
raw HTML you want to parse. The general idea:
|
80
79
|
|
81
|
-
* Atom feed must contain at least 1 entry, so look in HTML for some
|
82
|
-
crap which you break into 5 peaces:
|
83
|
-
|
84
|
-
|
80
|
+
* Atom feed must contain at least 1 entry, so look in the HTML for some
|
81
|
+
crap which you must break into 5 peaces:
|
82
|
+
|
83
|
+
- a title of the entry
|
84
|
+
- a link for it
|
85
|
+
- a date for the entry
|
86
|
+
- who is the author of the entry and
|
87
|
+
- its contents.
|
85
88
|
|
86
89
|
* After you scan and grab 1 entry, create a hash and add it to
|
87
90
|
_self_ as it was in the skeleton:
|
@@ -89,20 +92,20 @@ HTML you want to parse. The general idea:
|
|
89
92
|
self << { title: t, link: l, updated: u, author: a, content: c }
|
90
93
|
|
91
94
|
Here variables _t_, _l_, _u_, _a_ and _c_ contains the actual
|
92
|
-
|
95
|
+
value of 5 peaces for the entry. Names of the keys in the hash are
|
93
96
|
important of course--don't invent your own.
|
94
97
|
|
95
|
-
*
|
98
|
+
* Probably there would be more crap in the HTML that you can use to
|
96
99
|
construct another entry. Keep parsing and adding entries.
|
97
100
|
|
98
|
-
* While you scanning, use the 2 helper methods for cleaning each
|
99
|
-
peace: \#clean, which removed duplicate spaces and
|
100
|
-
|
101
|
-
|
101
|
+
* While you're scanning, use the 2 helper methods for cleaning each
|
102
|
+
peace: \#clean, which removed duplicate spaces and \#date, which parses
|
103
|
+
a string and return a date in ISO8601 format. You may override \#date
|
104
|
+
method if you like.
|
102
105
|
|
103
106
|
=== Options
|
104
107
|
|
105
|
-
Plugins can have _options_ and a user should provide
|
108
|
+
Plugins can have _options_ and a user should provide them to the plugin
|
106
109
|
in the real-time. For example, say you're scraping a site where many
|
107
110
|
users are wasting their time. If you want to watch for several of them
|
108
111
|
it is silly to write a new plugin every time for a new
|
@@ -130,20 +133,20 @@ _option_ becomes mandatory for the end-user.
|
|
130
133
|
To test how nice your plugin works, save the html page to the file
|
131
134
|
and type:
|
132
135
|
|
133
|
-
% bwkfanboy_parse -vv path/to/
|
136
|
+
% bwkfanboy_parse -vv /path/to/the/plugin.rb < saved_page.html
|
134
137
|
|
135
138
|
to see the result as in plain text, or
|
136
139
|
|
137
|
-
% bwkfanboy_parse -v path/to/
|
140
|
+
% bwkfanboy_parse -v /path/to/the/plugin.rb < saved_page.html
|
138
141
|
|
139
|
-
as pretty JSON.
|
142
|
+
as a pretty JSON.
|
140
143
|
|
141
144
|
For option-enabled plugins, supply additional parameters for them after
|
142
145
|
the plugin path:
|
143
146
|
|
144
|
-
% bwkfanboy_parse -vv path/to/
|
145
|
-
option_1 "
|
147
|
+
% bwkfanboy_parse -vv /path/to/the/plugin.rb \
|
148
|
+
option_1 "option 2" < saved_page.html
|
146
149
|
|
147
150
|
<tt>bwkfanboy_parse</tt> return 0 if no errors occurred or >= 1 if you
|
148
151
|
have errors in your plugin code. N.B.: the output from
|
149
|
-
<tt>bwkparser_parse</tt>
|
152
|
+
<tt>bwkparser_parse</tt> *must* always be in UTF-8.
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
require_relative 'utils'
|
4
|
+
|
5
|
+
module Bwkfanboy
|
6
|
+
class Fetch
|
7
|
+
|
8
|
+
# If no block given, return contents of fetch'ed URI. Otherwise,
|
9
|
+
# execute the block with 1 parameter--stream.
|
10
|
+
def self.cat(uri)
|
11
|
+
uri.chomp!
|
12
|
+
|
13
|
+
Bwkfanboy::Utils.veputs(1, "fetching #{uri}\n")
|
14
|
+
|
15
|
+
begin
|
16
|
+
open(uri, "User-Agent" => Bwkfanboy::Meta::USER_AGENT) {|f|
|
17
|
+
if defined?(f.meta) && f.status[0] != '200' then
|
18
|
+
Bwkfanboy::Utils.errx(1, "cannot fetch #{uri} : HTTP responce: #{f.status[0]}")
|
19
|
+
end
|
20
|
+
Bwkfanboy::Utils.veputs(1, "charset = #{f.content_type_parse[1][1]}\n") if defined?(f.meta)
|
21
|
+
if block_given?
|
22
|
+
yield f
|
23
|
+
else
|
24
|
+
return f.read
|
25
|
+
end
|
26
|
+
}
|
27
|
+
rescue
|
28
|
+
# typically Errno::ENOENT
|
29
|
+
Bwkfanboy::Utils.errx(1, "cannot fetch: #{$!}");
|
30
|
+
end
|
31
|
+
|
32
|
+
return ""
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'rss/maker'
|
2
|
+
require 'date'
|
3
|
+
require 'json'
|
4
|
+
require 'jsonschema'
|
5
|
+
|
6
|
+
require_relative 'utils'
|
7
|
+
|
8
|
+
module Bwkfanboy
|
9
|
+
class Generate
|
10
|
+
|
11
|
+
def self.validate(t)
|
12
|
+
schema = Bwkfanboy::Utils.gem_dir_system() + '/schema.js'
|
13
|
+
begin
|
14
|
+
JSON::Schema.validate(t, JSON.parse(File.read(schema)))
|
15
|
+
rescue
|
16
|
+
Bwkfanboy::Utils.errx(1, "JSON validation with schema (#{schema}) failed");
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.atom(src)
|
21
|
+
feed = RSS::Maker.make("atom") { |maker|
|
22
|
+
maker.channel.id = src['channel']['id']
|
23
|
+
maker.channel.updated = src['channel']['updated']
|
24
|
+
maker.channel.author = src['channel']['author']
|
25
|
+
maker.channel.title = src['channel']['title']
|
26
|
+
|
27
|
+
maker.channel.links.new_link {|i|
|
28
|
+
i.href = src['channel']['link']
|
29
|
+
i.rel = 'alternate'
|
30
|
+
i.type = 'text/html' # eh
|
31
|
+
}
|
32
|
+
|
33
|
+
maker.items.do_sort = true
|
34
|
+
|
35
|
+
src['x_entries'].each { |i|
|
36
|
+
maker.items.new_item do |item|
|
37
|
+
item.links.new_link {|k|
|
38
|
+
k.href = i['link']
|
39
|
+
k.rel = 'alternate'
|
40
|
+
k.type = 'text/html' # only to make happy crappy pr2nntp gateway
|
41
|
+
}
|
42
|
+
item.title = i['title']
|
43
|
+
item.author = i['author']
|
44
|
+
item.updated = i['updated']
|
45
|
+
item.content.type = src['channel']['x_entries_content_type']
|
46
|
+
|
47
|
+
case item.content.type
|
48
|
+
when 'text'
|
49
|
+
item.content.content = i['content']
|
50
|
+
when 'html'
|
51
|
+
item.content.content = i['content']
|
52
|
+
else
|
53
|
+
item.content.xhtml = i['content']
|
54
|
+
end
|
55
|
+
end
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
return feed
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
data/lib/bwkfanboy/parser.rb
CHANGED
@@ -8,7 +8,7 @@ module Bwkfanboy
|
|
8
8
|
|
9
9
|
# :include: ../../doc/plugin.rdoc
|
10
10
|
class Parse
|
11
|
-
ENTRIES_MAX =
|
11
|
+
ENTRIES_MAX = 128
|
12
12
|
|
13
13
|
attr_reader :opt
|
14
14
|
|
@@ -18,10 +18,10 @@ module Bwkfanboy
|
|
18
18
|
end
|
19
19
|
|
20
20
|
# Invokes #myparse & checks if it has grabbed something.
|
21
|
-
def parse()
|
21
|
+
def parse(stream)
|
22
22
|
@entries = []
|
23
23
|
begin
|
24
|
-
myparse()
|
24
|
+
myparse(stream)
|
25
25
|
rescue
|
26
26
|
@entries = []
|
27
27
|
Utils.errx(1, "parsing failed: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
|
@@ -99,7 +99,7 @@ module Bwkfanboy
|
|
99
99
|
protected
|
100
100
|
|
101
101
|
# This *must* be overridden in the child.
|
102
|
-
def myparse()
|
102
|
+
def myparse(stream)
|
103
103
|
raise "plugin isn't finished yet"
|
104
104
|
end
|
105
105
|
|
@@ -8,16 +8,16 @@ class Page < Bwkfanboy::Parse
|
|
8
8
|
URI = 'http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan'
|
9
9
|
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/bwk.html'
|
10
10
|
ENC = 'UTF-8'
|
11
|
-
VERSION =
|
11
|
+
VERSION = 2
|
12
12
|
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
13
13
|
TITLE = "Brian Kernighan's articles from Daily Princetonian"
|
14
14
|
CONTENT_TYPE = 'html'
|
15
15
|
end
|
16
16
|
|
17
|
-
def myparse()
|
17
|
+
def myparse(stream)
|
18
18
|
url = "http://www.dailyprincetonian.com"
|
19
19
|
|
20
|
-
doc = Nokogiri::HTML(
|
20
|
+
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
21
21
|
doc.xpath("//div[@class='article_item']").each {|i|
|
22
22
|
t = clean(i.xpath("h2/a").children.text())
|
23
23
|
fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)
|
@@ -5,7 +5,7 @@ class Page < Bwkfanboy::Parse
|
|
5
5
|
URI = '/usr/ports/UPDATING'
|
6
6
|
URI_DEBUG = URI
|
7
7
|
ENC = 'ASCII'
|
8
|
-
VERSION =
|
8
|
+
VERSION = 2
|
9
9
|
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
10
10
|
TITLE = "News from FreeBSD ports"
|
11
11
|
CONTENT_TYPE = 'text'
|
@@ -24,7 +24,7 @@ class Page < Bwkfanboy::Parse
|
|
24
24
|
return t
|
25
25
|
end
|
26
26
|
|
27
|
-
def myparse()
|
27
|
+
def myparse(stream)
|
28
28
|
re_u = /^(\d{8}):$/
|
29
29
|
re_t1 = /^ {2}AFFECTS:\s+(.+)$/
|
30
30
|
re_t2 = /^\s+(.+)$/
|
@@ -33,7 +33,7 @@ class Page < Bwkfanboy::Parse
|
|
33
33
|
ready = false
|
34
34
|
mode = nil
|
35
35
|
t = l = u = a = c = nil
|
36
|
-
while line =
|
36
|
+
while line = stream.gets
|
37
37
|
line.rstrip!
|
38
38
|
|
39
39
|
if line =~ re_u then
|
@@ -17,17 +17,17 @@ class Page < Bwkfanboy::Parse
|
|
17
17
|
URI = 'http://www.quora.com/#{opt[0]}/answers'
|
18
18
|
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
|
19
19
|
ENC = 'UTF-8'
|
20
|
-
VERSION =
|
20
|
+
VERSION = 5
|
21
21
|
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
22
22
|
TITLE = "Last n answers (per-user) from Quora; requires nodejs"
|
23
23
|
CONTENT_TYPE = 'html'
|
24
24
|
end
|
25
25
|
|
26
|
-
def myparse()
|
26
|
+
def myparse(stream)
|
27
27
|
profile = opt[0] # for example, 'Brandon-Smietana'
|
28
28
|
|
29
29
|
# read stdin
|
30
|
-
doc = Nokogiri::HTML(
|
30
|
+
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
31
31
|
|
32
32
|
# extract & evaluate JavaScript into tstp
|
33
33
|
tstp = nil
|
data/lib/bwkfanboy/utils.rb
CHANGED
@@ -7,7 +7,7 @@ require 'active_support/core_ext/module/attribute_accessors'
|
|
7
7
|
module Bwkfanboy
|
8
8
|
module Meta
|
9
9
|
NAME = 'bwkfanboy'
|
10
|
-
VERSION = '
|
10
|
+
VERSION = '1.1.4'
|
11
11
|
USER_AGENT = "#{NAME}/#{VERSION} (#{RUBY_PLATFORM}; N; #{Encoding.default_external.name}; #{RUBY_ENGINE}; rv:#{RUBY_VERSION}.#{RUBY_PATCHLEVEL})"
|
12
12
|
PLUGIN_CLASS = 'Page'
|
13
13
|
DIR_TMP = "/tmp/#{Meta::NAME}/#{ENV['USER']}"
|
@@ -89,7 +89,7 @@ module Bwkfanboy
|
|
89
89
|
# TODO get rid of eval()
|
90
90
|
fail "class #{class_name} isn't defined" if (! eval("defined?#{class_name}") || ! eval(class_name).is_a?(Class) )
|
91
91
|
rescue LoadError
|
92
|
-
errx(1, "cannot load plugin '#{path}'");
|
92
|
+
errx(1, "cannot load plugin '#{path}' #{$!}");
|
93
93
|
rescue Exception
|
94
94
|
errx(1, "plugin '#{path}' has errors: #{$!}\n\nBacktrace:\n\n#{$!.backtrace.join("\n")}")
|
95
95
|
end
|
data/test/test_fetch.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
|
-
#!/usr/bin/env ruby19
|
2
|
-
|
3
|
-
require 'minitest/autorun'
|
4
1
|
require 'digest/md5'
|
5
2
|
|
6
3
|
require_relative '../lib/bwkfanboy/utils'
|
7
|
-
require_relative 'ts_utils
|
4
|
+
require_relative 'ts_utils'
|
8
5
|
|
9
6
|
# TODO add HTTP 404 check; drop connection from server during HTTP 200
|
10
7
|
# replay...
|
data/test/test_generate.rb
CHANGED
data/test/test_parse.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
#!/usr/bin/env ruby19
|
2
|
-
|
3
|
-
require 'minitest/autorun'
|
4
1
|
require 'digest/md5'
|
5
2
|
|
6
3
|
require_relative '../lib/bwkfanboy/utils'
|
@@ -17,16 +14,16 @@ class TestParse < MiniTest::Unit::TestCase
|
|
17
14
|
|
18
15
|
def test_empty_plugin
|
19
16
|
cmd CMD
|
20
|
-
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{@tpath}plugins/empty.rb ")
|
17
|
+
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{Dir.pwd}/#{@tpath}plugins/empty.rb ")
|
21
18
|
assert_equal(1, r[0])
|
22
19
|
assert_match(/plugin .+ has errors: class Page isn't defined/, r[1])
|
23
20
|
end
|
24
21
|
|
25
22
|
def test_plugin_parse
|
26
23
|
cmd CMD
|
27
|
-
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{@tpath}plugins/bwk.rb < #{@tpath}semis/bwk.html")
|
24
|
+
r = Bwkfanboy::Utils.cmd_run("#{cmd CMD} #{Dir.pwd}/#{@tpath}../lib/bwkfanboy/plugins/bwk.rb < #{@tpath}semis/bwk.html")
|
28
25
|
assert_equal(0, r[0])
|
29
|
-
# bin/bwkfanboy_parse
|
30
|
-
assert_equal('
|
26
|
+
# bin/bwkfanboy_parse `pwd`/lib/bwkfanboy/plugins/bwk.rb < test/semis/bwk.html | md5
|
27
|
+
assert_equal('a433a4a27bafb060a41aa85a40808056', Digest::MD5.hexdigest(r[2]))
|
31
28
|
end
|
32
29
|
end
|
data/test/test_server.rb
CHANGED
data/test/ts_utils.rb
CHANGED
metadata
CHANGED
@@ -3,10 +3,10 @@ name: bwkfanboy
|
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
|
-
- 0
|
7
6
|
- 1
|
8
|
-
-
|
9
|
-
|
7
|
+
- 1
|
8
|
+
- 4
|
9
|
+
version: 1.1.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Alexander Gromnitsky
|
@@ -14,27 +14,29 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-11-08 00:00:00 +02:00
|
18
18
|
default_executable: bwkfanboy
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: activesupport
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
27
28
|
segments:
|
28
29
|
- 3
|
29
30
|
- 0
|
30
|
-
-
|
31
|
-
version: 3.0.
|
31
|
+
- 1
|
32
|
+
version: 3.0.1
|
32
33
|
type: :runtime
|
33
34
|
version_requirements: *id001
|
34
35
|
- !ruby/object:Gem::Dependency
|
35
36
|
name: nokogiri
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -49,6 +51,7 @@ dependencies:
|
|
49
51
|
name: open4
|
50
52
|
prerelease: false
|
51
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
54
|
+
none: false
|
52
55
|
requirements:
|
53
56
|
- - ">="
|
54
57
|
- !ruby/object:Gem::Version
|
@@ -63,6 +66,7 @@ dependencies:
|
|
63
66
|
name: jsonschema
|
64
67
|
prerelease: false
|
65
68
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
66
70
|
requirements:
|
67
71
|
- - ">="
|
68
72
|
- !ruby/object:Gem::Version
|
@@ -84,15 +88,14 @@ executables:
|
|
84
88
|
extensions: []
|
85
89
|
|
86
90
|
extra_rdoc_files:
|
87
|
-
- bin/bwkfanboy_generate
|
88
|
-
- bin/bwkfanboy_parse
|
89
|
-
- bin/bwkfanboy
|
90
|
-
- bin/bwkfanboy_server
|
91
|
-
- bin/bwkfanboy_fetch
|
92
|
-
- doc/plugin.rdoc
|
93
|
-
- doc/README.rdoc
|
94
91
|
- doc/LICENSE
|
95
92
|
- doc/NEWS.rdoc
|
93
|
+
- doc/README.rdoc
|
94
|
+
- doc/plugin.rdoc
|
95
|
+
- doc/bwkfanboy_fetch.rdoc
|
96
|
+
- doc/bwkfanboy_generate.rdoc
|
97
|
+
- doc/bwkfanboy_parse.rdoc
|
98
|
+
- doc/bwkfanboy_server.rdoc
|
96
99
|
files:
|
97
100
|
- lib/bwkfanboy/plugins/bwk.rb
|
98
101
|
- lib/bwkfanboy/plugins/freebsd-ports-update.rb
|
@@ -101,20 +104,25 @@ files:
|
|
101
104
|
- lib/bwkfanboy/parser.rb
|
102
105
|
- lib/bwkfanboy/utils.rb
|
103
106
|
- lib/bwkfanboy/schema.js
|
107
|
+
- lib/bwkfanboy/fetch.rb
|
108
|
+
- lib/bwkfanboy/generate.rb
|
104
109
|
- bin/bwkfanboy_generate
|
105
110
|
- bin/bwkfanboy_parse
|
106
111
|
- bin/bwkfanboy
|
107
112
|
- bin/bwkfanboy_server
|
108
113
|
- bin/bwkfanboy_fetch
|
109
|
-
- doc/plugin.rdoc
|
110
|
-
- doc/README.rdoc
|
111
114
|
- doc/LICENSE
|
112
115
|
- doc/NEWS.rdoc
|
116
|
+
- doc/README.rdoc
|
117
|
+
- doc/plugin.rdoc
|
118
|
+
- doc/bwkfanboy_fetch.rdoc
|
119
|
+
- doc/bwkfanboy_generate.rdoc
|
120
|
+
- doc/bwkfanboy_parse.rdoc
|
121
|
+
- doc/bwkfanboy_server.rdoc
|
113
122
|
- README.rdoc
|
114
123
|
- Rakefile
|
115
124
|
- TODO
|
116
125
|
- test/plugins/empty.rb
|
117
|
-
- test/plugins/bwk.rb
|
118
126
|
- test/semis/bwk.html
|
119
127
|
- test/semis/bwk.json
|
120
128
|
- test/semis/quora.html
|
@@ -134,20 +142,23 @@ licenses: []
|
|
134
142
|
post_install_message:
|
135
143
|
rdoc_options:
|
136
144
|
- -m
|
137
|
-
-
|
145
|
+
- doc/README.rdoc
|
138
146
|
- -x
|
139
147
|
- plugins
|
140
148
|
require_paths:
|
141
149
|
- lib
|
142
150
|
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
none: false
|
143
152
|
requirements:
|
144
153
|
- - ">="
|
145
154
|
- !ruby/object:Gem::Version
|
146
155
|
segments:
|
147
156
|
- 1
|
148
157
|
- 9
|
149
|
-
|
158
|
+
- 2
|
159
|
+
version: 1.9.2
|
150
160
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
|
+
none: false
|
151
162
|
requirements:
|
152
163
|
- - ">="
|
153
164
|
- !ruby/object:Gem::Version
|
@@ -157,7 +168,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
157
168
|
requirements: []
|
158
169
|
|
159
170
|
rubyforge_project:
|
160
|
-
rubygems_version: 1.3.
|
171
|
+
rubygems_version: 1.3.7
|
161
172
|
signing_key:
|
162
173
|
specification_version: 3
|
163
174
|
summary: A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.
|
data/test/plugins/bwk.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
|
3
|
-
class Page < Bwkfanboy::Parse
|
4
|
-
module Meta
|
5
|
-
URI = "html/bwk.html"
|
6
|
-
ENC = 'UTF-8'
|
7
|
-
VERSION = 1
|
8
|
-
COPYRIGHT = '(c) 2010 Alexander Gromnitsky'
|
9
|
-
TITLE = "Brian Kernighan's articles from Daily Princetonian"
|
10
|
-
CONTENT_TYPE = 'html'
|
11
|
-
end
|
12
|
-
|
13
|
-
def myparse()
|
14
|
-
url = "http://www.dailyprincetonian.com"
|
15
|
-
|
16
|
-
doc = Nokogiri::HTML(STDIN, nil, Meta::ENC)
|
17
|
-
doc.xpath("//div[@class='article_item']").each {|i|
|
18
|
-
t = clean(i.xpath("h2/a").children.text())
|
19
|
-
fail 'unable to extract link' if (link = clean(i.xpath("h2/a")[0].attributes['href'].value()).empty?)
|
20
|
-
link = clean(i.xpath("h2/a")[0].attributes['href'].value())
|
21
|
-
l = url + link + "print"
|
22
|
-
u = date(i.xpath("h2").children[1].text())
|
23
|
-
a = clean(i.xpath("div/span/a[1]").children.text())
|
24
|
-
c = clean(i.xpath("div[@class='summary']").text())
|
25
|
-
|
26
|
-
self << { title: t, link: l, updated: u, author: a, content: c }
|
27
|
-
}
|
28
|
-
end
|
29
|
-
end
|