bwkfanboy 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +9 -2
- data/Rakefile +1 -1
- data/TODO +0 -3
- data/doc/NEWS.rdoc +4 -0
- data/doc/README.rdoc +9 -2
- data/doc/plugin.rdoc +14 -14
- data/lib/bwkfanboy/plugins/bwk.rb +1 -1
- data/lib/bwkfanboy/plugins/freebsd-ports-update.rb +1 -1
- data/lib/bwkfanboy/plugins/quora.js +3 -0
- data/lib/bwkfanboy/plugins/quora.rb +7 -3
- data/lib/bwkfanboy/utils.rb +7 -4
- data/test/semis/quora.html +8 -11
- data/test/test_server.rb +2 -2
- metadata +5 -4
- /data/{LICENSE → doc/LICENSE} +0 -0
data/README.rdoc
CHANGED
@@ -7,12 +7,16 @@ The converter is not a magick tool: you'll need to write a plugin (in
|
|
7
7
|
Ruby) for each site you want to watch. bwkfanboy provides guidelines and
|
8
8
|
general assistance.
|
9
9
|
|
10
|
+
(Plugins included with bwkfanboy are usually updated more frequently
|
11
|
+
than the whole gem on rubygems.org, so grab the source before
|
12
|
+
struggling).
|
13
|
+
|
10
14
|
= Architecture
|
11
15
|
|
12
16
|
== Plugins
|
13
17
|
|
14
|
-
bwkfanboy comes with
|
15
|
-
dailyprincetonian.com looking for bwk's articles.
|
18
|
+
bwkfanboy comes with several plugins. One of them, for example, parses a
|
19
|
+
search page of dailyprincetonian.com looking for bwk's articles.
|
16
20
|
|
17
21
|
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
|
18
22
|
parent, overriding 1 method.
|
@@ -86,3 +90,6 @@ There are 2 method to get an Atom feed via HTTP:
|
|
86
90
|
2. Small *bwkfanboy_server* HTTP server. It can run from any user and
|
87
91
|
thus is able to inherit env variables for discovering your HOME
|
88
92
|
directory. Read bin/bwkfanboy_server to know how to operate it.
|
93
|
+
|
94
|
+
= License
|
95
|
+
:include: doc/LICENSE
|
data/Rakefile
CHANGED
@@ -9,7 +9,7 @@ require 'rake/testtask'
|
|
9
9
|
spec = Gem::Specification.new() {|i|
|
10
10
|
i.name = "bwkfanboy"
|
11
11
|
i.summary = 'A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.'
|
12
|
-
i.version = '0.1.
|
12
|
+
i.version = '0.1.2'
|
13
13
|
i.author = 'Alexander Gromnitsky'
|
14
14
|
i.email = 'alexander.gromnitsky@gmail.com'
|
15
15
|
i.homepage = 'http://github.com/gromnitsky/bwkfanboy'
|
data/TODO
CHANGED
data/doc/NEWS.rdoc
CHANGED
data/doc/README.rdoc
CHANGED
@@ -7,12 +7,16 @@ The converter is not a magick tool: you'll need to write a plugin (in
|
|
7
7
|
Ruby) for each site you want to watch. bwkfanboy provides guidelines and
|
8
8
|
general assistance.
|
9
9
|
|
10
|
+
(Plugins included with bwkfanboy are usually updated more frequently
|
11
|
+
than the whole gem on rubygems.org, so grab the source before
|
12
|
+
struggling).
|
13
|
+
|
10
14
|
= Architecture
|
11
15
|
|
12
16
|
== Plugins
|
13
17
|
|
14
|
-
bwkfanboy comes with
|
15
|
-
dailyprincetonian.com looking for bwk's articles.
|
18
|
+
bwkfanboy comes with several plugins. One of them, for example, parses a
|
19
|
+
search page of dailyprincetonian.com looking for bwk's articles.
|
16
20
|
|
17
21
|
The plugin is a Ruby class +Page+ that inherits Bwkfanboy::Parse
|
18
22
|
parent, overriding 1 method.
|
@@ -86,3 +90,6 @@ There are 2 method to get an Atom feed via HTTP:
|
|
86
90
|
2. Small *bwkfanboy_server* HTTP server. It can run from any user and
|
87
91
|
thus is able to inherit env variables for discovering your HOME
|
88
92
|
directory. Read bin/bwkfanboy_server to know how to operate it.
|
93
|
+
|
94
|
+
= License
|
95
|
+
:include: doc/LICENSE
|
data/doc/plugin.rdoc
CHANGED
@@ -102,28 +102,28 @@ HTML you want to parse. The general idea:
|
|
102
102
|
|
103
103
|
=== Options
|
104
104
|
|
105
|
-
Plugins can have _options_
|
106
|
-
the real-time. For example, say you're scraping a site where many
|
107
|
-
are wasting their time. If you want to watch for several of them
|
108
|
-
silly to write a new plugin every time for a new
|
109
|
-
you can write 1 plugin which have an _option_ to
|
110
|
-
user name, in this case).
|
105
|
+
Plugins can have _options_ and a user should provide then to the plugin
|
106
|
+
in the real-time. For example, say you're scraping a site where many
|
107
|
+
users are wasting their time. If you want to watch for several of them
|
108
|
+
it is silly to write a new plugin every time for a new
|
109
|
+
participant. Instead, you can write 1 plugin which have an _option_ to
|
110
|
+
take a parameter (a user name, in this case).
|
111
111
|
|
112
|
-
Options (if any) are always accessible via \#opt method which is just
|
113
|
-
attr_reader of
|
112
|
+
Options (if any) are always accessible via \#opt method which is just an
|
113
|
+
attr_reader of an array.
|
114
114
|
|
115
|
-
|
116
|
-
|
115
|
+
One can play the really interesting trick with Meta::URI constant. It is
|
116
|
+
possible to make it dynamic, for example:
|
117
117
|
|
118
118
|
URI = 'http://www.quora.com/#{opt[0]}/answers'
|
119
119
|
|
120
|
-
Then, if user will provide 1 option (say 'Mark-Suster')--it will
|
121
|
-
in the final URI as follows:
|
120
|
+
Then, if a user will provide 1 option (say 'Mark-Suster')--it will
|
121
|
+
appear in the final URI as follows:
|
122
122
|
|
123
123
|
http://www.quora.com/Mark-Suster/answers
|
124
124
|
|
125
|
-
Such dynamic is possible only for Meta::URI constant and
|
126
|
-
|
125
|
+
Such dynamic is possible only for Meta::URI constant and in such case,
|
126
|
+
_option_ becomes mandatory for the end-user.
|
127
127
|
|
128
128
|
== How to test all this
|
129
129
|
|
@@ -9,7 +9,7 @@ class Page < Bwkfanboy::Parse
|
|
9
9
|
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/bwk.html'
|
10
10
|
ENC = 'UTF-8'
|
11
11
|
VERSION = 1
|
12
|
-
COPYRIGHT = '
|
12
|
+
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
13
13
|
TITLE = "Brian Kernighan's articles from Daily Princetonian"
|
14
14
|
CONTENT_TYPE = 'html'
|
15
15
|
end
|
@@ -82,6 +82,9 @@ function prepare4eval(body) {
|
|
82
82
|
"function LoginSignal(args) { return arr(arguments) }\n" +
|
83
83
|
"function LiveLogin(args) { return arr(arguments) }\n" +
|
84
84
|
"function PresencePageMonitor(args) { return arr(arguments) }\n" +
|
85
|
+
"function UserSig(args) { return arr(arguments) }\n" +
|
86
|
+
"function HeaderLogo(args) { return arr(arguments) }\n" +
|
87
|
+
"function NavElement(args) { return arr(arguments) }\n" +
|
85
88
|
'';
|
86
89
|
var tail = "\n_components;\n";
|
87
90
|
|
@@ -17,9 +17,9 @@ class Page < Bwkfanboy::Parse
|
|
17
17
|
URI = 'http://www.quora.com/#{opt[0]}/answers'
|
18
18
|
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
|
19
19
|
ENC = 'UTF-8'
|
20
|
-
VERSION =
|
20
|
+
VERSION = 3
|
21
21
|
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
22
|
-
TITLE = "Last n answers (per-user) from Quora
|
22
|
+
TITLE = "Last n answers (per-user) from Quora; requires nodejs"
|
23
23
|
CONTENT_TYPE = 'html'
|
24
24
|
end
|
25
25
|
|
@@ -34,8 +34,11 @@ class Page < Bwkfanboy::Parse
|
|
34
34
|
doc.xpath("//script").each {|i|
|
35
35
|
js = i.text
|
36
36
|
if js.include?('"epoch_us"')
|
37
|
+
if Bwkfanboy::Utils.cfg[:verbose] >= 3
|
38
|
+
File.open("#{File.basename(__FILE__)}-epoch.js.raw", "w+") {|i| i.puts js }
|
39
|
+
end
|
37
40
|
r = Bwkfanboy::Utils.cmd_run("echo '#{js}' | #{File.dirname(__FILE__)}/quora.js")
|
38
|
-
fail
|
41
|
+
fail "evaluation in nodejs failed: #{r[1]}" if r[0] != 0
|
39
42
|
tstp = JSON.parse(r[2])
|
40
43
|
break
|
41
44
|
end
|
@@ -51,6 +54,7 @@ class Page < Bwkfanboy::Parse
|
|
51
54
|
l = clean(i.xpath("h2//a")[0].attributes['href'].value())
|
52
55
|
next unless tstp.key?(l) # ignore answers without timestamps
|
53
56
|
u = date(Time.at(tstp[l]/1000/1000).to_s)
|
57
|
+
# u = DateTime.new.iso8601
|
54
58
|
l = url + l + '/answer/' + profile
|
55
59
|
|
56
60
|
c = i.xpath("../div[@class='hidden expanded_q_text']/div").inner_html(encoding: Meta::ENC)
|
data/lib/bwkfanboy/utils.rb
CHANGED
@@ -7,7 +7,7 @@ require 'active_support/core_ext/module/attribute_accessors'
|
|
7
7
|
module Bwkfanboy
|
8
8
|
module Meta
|
9
9
|
NAME = 'bwkfanboy'
|
10
|
-
VERSION = '0.1.
|
10
|
+
VERSION = '0.1.2'
|
11
11
|
USER_AGENT = "#{NAME}/#{VERSION} (#{RUBY_PLATFORM}; N; #{Encoding.default_external.name}; #{RUBY_ENGINE}; rv:#{RUBY_VERSION}.#{RUBY_PATCHLEVEL})"
|
12
12
|
PLUGIN_CLASS = 'Page'
|
13
13
|
DIR_TMP = "/tmp/#{Meta::NAME}/#{ENV['USER']}"
|
@@ -125,9 +125,12 @@ module Bwkfanboy
|
|
125
125
|
|
126
126
|
# used in CGI and WEBrick examples
|
127
127
|
def self.cmd_run(cmd)
|
128
|
-
|
129
|
-
|
130
|
-
|
128
|
+
so = sr = ''
|
129
|
+
status = Open4::popen4(cmd) { |pid, stdin, stdout, stderr|
|
130
|
+
so = stdout.read
|
131
|
+
sr = stderr.read
|
132
|
+
}
|
133
|
+
[status.exitstatus, sr, so]
|
131
134
|
end
|
132
135
|
|
133
136
|
def self.gem_dir_system
|