bwkfanboy 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/Rakefile +1 -2
- data/bin/bwkfanboy +1 -0
- data/doc/NEWS.rdoc +10 -0
- data/doc/README.rdoc +1 -1
- data/lib/bwkfanboy/meta.rb +1 -1
- data/lib/bwkfanboy/plugins/inc.rb +37 -0
- data/test/semis/inc.html +1067 -0
- data/test/semis/links.txt +1 -1
- data/test/test_server.rb +13 -12
- metadata +64 -99
- data/lib/bwkfanboy/plugins/quora.js +0 -111
- data/lib/bwkfanboy/plugins/quora.rb +0 -81
- data/test/semis/quora.html +0 -45
data/test/semis/links.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1
1
|
# output_file URL
|
2
|
-
|
2
|
+
inc.html http://www.inc.com/author/jason-fried
|
3
3
|
bwk.html http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan
|
4
4
|
econlib.html http://www.econlib.org/cgi-bin/searcharticles.pl?sortby=DD&query=ha*
|
data/test/test_server.rb
CHANGED
@@ -25,19 +25,20 @@ class TestServer < MiniTest::Unit::TestCase
|
|
25
25
|
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}") }
|
26
26
|
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=INVALID") }
|
27
27
|
# 'o' is missing
|
28
|
-
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=
|
28
|
+
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=inc") }
|
29
29
|
end
|
30
30
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
31
|
+
def test_right_plugins
|
32
|
+
plugins = {
|
33
|
+
'bwk' => '64186fac2c52e5a969ad5675b9cc95ed',
|
34
|
+
'econlib' => '11f6114a9ab54d6ec67a26cbd76f5260',
|
35
|
+
'inc' => '13dae248c81dd6407ff327dd5575f8b5',
|
36
|
+
}
|
37
|
+
plugins.each {|k,v|
|
38
|
+
r = ''
|
39
|
+
open("http://#{ADDR}:#{@port}/?p=#{k}&o=foo") { |f| r = f.read }
|
40
|
+
# wget -q -O - '127.0.0.1:9042/?p=inc&o=foo' | md5
|
41
|
+
assert_equal(v, Digest::MD5.hexdigest(r))
|
42
|
+
}
|
42
43
|
end
|
43
44
|
end
|
metadata
CHANGED
@@ -1,108 +1,82 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bwkfanboy
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 3
|
8
|
-
- 2
|
9
|
-
version: 1.3.2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.4.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Alexander Gromnitsky
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-04-06 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: open4
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &338037960 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 1
|
30
|
-
- 0
|
31
|
-
- 1
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
32
21
|
version: 1.0.1
|
33
22
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: activesupport
|
37
23
|
prerelease: false
|
38
|
-
|
24
|
+
version_requirements: *338037960
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: activesupport
|
27
|
+
requirement: &338035630 !ruby/object:Gem::Requirement
|
39
28
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 3
|
45
|
-
- 0
|
46
|
-
- 5
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
47
32
|
version: 3.0.5
|
48
33
|
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: nokogiri
|
52
34
|
prerelease: false
|
53
|
-
|
35
|
+
version_requirements: *338035630
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: nokogiri
|
38
|
+
requirement: &338034330 !ruby/object:Gem::Requirement
|
54
39
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
segments:
|
59
|
-
- 1
|
60
|
-
- 4
|
61
|
-
- 4
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
62
43
|
version: 1.4.4
|
63
44
|
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: jsonschema
|
67
45
|
prerelease: false
|
68
|
-
|
46
|
+
version_requirements: *338034330
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: jsonschema
|
49
|
+
requirement: &338032480 !ruby/object:Gem::Requirement
|
69
50
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
segments:
|
74
|
-
- 2
|
75
|
-
- 0
|
76
|
-
- 1
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
77
54
|
version: 2.0.1
|
78
55
|
type: :runtime
|
79
|
-
version_requirements: *id004
|
80
|
-
- !ruby/object:Gem::Dependency
|
81
|
-
name: git
|
82
56
|
prerelease: false
|
83
|
-
|
57
|
+
version_requirements: *338032480
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: git
|
60
|
+
requirement: &338031220 !ruby/object:Gem::Requirement
|
84
61
|
none: false
|
85
|
-
requirements:
|
86
|
-
- -
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
segments:
|
89
|
-
- 1
|
90
|
-
- 2
|
91
|
-
- 5
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
92
65
|
version: 1.2.5
|
93
66
|
type: :development
|
94
|
-
|
95
|
-
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *338031220
|
69
|
+
description: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use
|
70
|
+
it to watch sites that do not provide its own feed.
|
96
71
|
email: alexander.gromnitsky@gmail.com
|
97
|
-
executables:
|
72
|
+
executables:
|
98
73
|
- bwkfanboy_generate
|
99
74
|
- bwkfanboy_parse
|
100
75
|
- bwkfanboy
|
101
76
|
- bwkfanboy_server
|
102
77
|
- bwkfanboy_fetch
|
103
78
|
extensions: []
|
104
|
-
|
105
|
-
extra_rdoc_files:
|
79
|
+
extra_rdoc_files:
|
106
80
|
- doc/NEWS.rdoc
|
107
81
|
- doc/plugin.rdoc
|
108
82
|
- doc/bwkfanboy_fetch.rdoc
|
@@ -110,7 +84,7 @@ extra_rdoc_files:
|
|
110
84
|
- doc/bwkfanboy_parse.rdoc
|
111
85
|
- doc/bwkfanboy_server.rdoc
|
112
86
|
- doc/README.rdoc
|
113
|
-
files:
|
87
|
+
files:
|
114
88
|
- README.rdoc
|
115
89
|
- Rakefile
|
116
90
|
- bin/bwkfanboy
|
@@ -137,8 +111,7 @@ files:
|
|
137
111
|
- lib/bwkfanboy/plugins/bwk.rb
|
138
112
|
- lib/bwkfanboy/plugins/econlib.rb
|
139
113
|
- lib/bwkfanboy/plugins/freebsd-ports-update.rb
|
140
|
-
- lib/bwkfanboy/plugins/
|
141
|
-
- lib/bwkfanboy/plugins/quora.rb
|
114
|
+
- lib/bwkfanboy/plugins/inc.rb
|
142
115
|
- lib/bwkfanboy/schema.js
|
143
116
|
- lib/bwkfanboy/utils.rb
|
144
117
|
- test/helper.rb
|
@@ -149,52 +122,44 @@ files:
|
|
149
122
|
- test/semis/bwk.html
|
150
123
|
- test/semis/bwk.json
|
151
124
|
- test/semis/econlib.html
|
125
|
+
- test/semis/inc.html
|
152
126
|
- test/semis/links.txt
|
153
|
-
- test/semis/quora.html
|
154
127
|
- test/test_fetch.rb
|
155
128
|
- test/test_generate.rb
|
156
129
|
- test/test_parse.rb
|
157
130
|
- test/test_server.rb
|
158
131
|
- test/xml-clean.sh
|
159
132
|
- web/bwkfanboy.cgi
|
160
|
-
has_rdoc: true
|
161
133
|
homepage: http://github.com/gromnitsky/bwkfanboy
|
162
134
|
licenses: []
|
163
|
-
|
164
135
|
post_install_message:
|
165
|
-
rdoc_options:
|
136
|
+
rdoc_options:
|
166
137
|
- -m
|
167
138
|
- doc/README.rdoc
|
168
139
|
- -x
|
169
140
|
- plugins
|
170
|
-
require_paths:
|
141
|
+
require_paths:
|
171
142
|
- lib
|
172
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
143
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
173
144
|
none: false
|
174
|
-
requirements:
|
175
|
-
- -
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
segments:
|
178
|
-
- 1
|
179
|
-
- 9
|
180
|
-
- 2
|
145
|
+
requirements:
|
146
|
+
- - ! '>='
|
147
|
+
- !ruby/object:Gem::Version
|
181
148
|
version: 1.9.2
|
182
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
183
150
|
none: false
|
184
|
-
requirements:
|
185
|
-
- -
|
186
|
-
- !ruby/object:Gem::Version
|
187
|
-
|
188
|
-
- 0
|
189
|
-
version: "0"
|
151
|
+
requirements:
|
152
|
+
- - ! '>='
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
190
155
|
requirements: []
|
191
|
-
|
192
156
|
rubyforge_project:
|
193
|
-
rubygems_version: 1.
|
157
|
+
rubygems_version: 1.7.2
|
194
158
|
signing_key:
|
195
159
|
specification_version: 3
|
196
|
-
summary:
|
197
|
-
|
160
|
+
summary: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use it
|
161
|
+
to watch sites that do not provide its own feed.
|
162
|
+
test_files:
|
198
163
|
- test/test_fetch.rb
|
199
164
|
- test/test_parse.rb
|
200
165
|
- test/test_server.rb
|
@@ -1,111 +0,0 @@
|
|
1
|
-
#!/usr/bin/env node
|
2
|
-
|
3
|
-
/*
|
4
|
-
A companion to 'quora.rb' plugin.
|
5
|
-
*/
|
6
|
-
|
7
|
-
var u = require('util')
|
8
|
-
|
9
|
-
function get_timestamps(data) {
|
10
|
-
var e = {}
|
11
|
-
var link = null
|
12
|
-
for (i in data) {
|
13
|
-
var f = data[i][0]
|
14
|
-
|
15
|
-
if (f == 'FeedStoryItem') {
|
16
|
-
link = data[i][3]['q_path']
|
17
|
-
// u.puts(link)
|
18
|
-
}
|
19
|
-
|
20
|
-
if (f == 'DateTimeComponent') {
|
21
|
-
e[link] = data[i][3]['epoch_us']
|
22
|
-
}
|
23
|
-
}
|
24
|
-
return e
|
25
|
-
}
|
26
|
-
|
27
|
-
function collect_stdin(callback) {
|
28
|
-
var input = '';
|
29
|
-
process.stdin.resume();
|
30
|
-
process.stdin.setEncoding('ascii');
|
31
|
-
process.stdin.on('data', function (chunk) {
|
32
|
-
input += chunk
|
33
|
-
});
|
34
|
-
process.stdin.on('end', function () {
|
35
|
-
callback(input);
|
36
|
-
});
|
37
|
-
};
|
38
|
-
|
39
|
-
function prepare4eval(body) {
|
40
|
-
var head = "function W2() {}\n" +
|
41
|
-
"W2.addComponentMetadata = function(foo) {}\n" +
|
42
|
-
"W2.registerComponents = function(foo) {}\n" +
|
43
|
-
"W2._ConnectionWarningCls = function(args) {}\n" +
|
44
|
-
"W2._LoadingCls = function(args) {}\n" +
|
45
|
-
"W2._InteractionModeCls = function(args) {}\n" +
|
46
|
-
"W2.onLoad = function(args) {}\n" +
|
47
|
-
"\n" +
|
48
|
-
"document = 'foo'\n" +
|
49
|
-
"$ = function(foo) { return $ }\n" +
|
50
|
-
"$.ready = function(foo) {}\n" +
|
51
|
-
"\n" +
|
52
|
-
"arr = function(args) {\n" +
|
53
|
-
" a = []\n" +
|
54
|
-
" a.push(args.callee.name)\n" +
|
55
|
-
" for(var i = 0; i < args.length; i++) { a.push(args[i]) }\n" +
|
56
|
-
" return a\n" +
|
57
|
-
"}\n" +
|
58
|
-
"\n" +
|
59
|
-
"function LoginButton(args) { return arr(arguments) }\n" +
|
60
|
-
"function ContextNavigator(args) { return arr(arguments) }\n" +
|
61
|
-
"function TypeaheadContextText(args) { return arr(arguments) }\n" +
|
62
|
-
"function TypeaheadResults(args) { return arr(arguments) }\n" +
|
63
|
-
"function QuestionAddLink(args) { return arr(arguments) }\n" +
|
64
|
-
"function TitleNotificationsCount(args) { return arr(arguments) }\n" +
|
65
|
-
"function TextareaAutoSize(args) { return arr(arguments) }\n" +
|
66
|
-
"function PMsgContainer(args) { return arr(arguments) }\n" +
|
67
|
-
"function UserAdminMenuLink(args) { return arr(arguments) }\n" +
|
68
|
-
"function PagedList(args) { return arr(arguments) }\n" +
|
69
|
-
"function FeedStoryItem(args) { return arr(arguments) }\n" +
|
70
|
-
"function QuestionLink(args) { return arr(arguments) }\n" +
|
71
|
-
"function QuestionBestSourceIcon(args) { return arr(arguments) }\n" +
|
72
|
-
"function AnswerVotingButtons(args) { return arr(arguments) }\n" +
|
73
|
-
"function TruncatePhraseList(args) { return arr(arguments) }\n" +
|
74
|
-
"function CommentLink(args) { return arr(arguments) }\n" +
|
75
|
-
"function DateTimeComponent(args) { return arr(arguments) }\n" +
|
76
|
-
"function AnswerComments(args) { return arr(arguments) }\n" +
|
77
|
-
"function Comment(args) { return arr(arguments) }\n" +
|
78
|
-
"function FeedAnswerItem(args) { return arr(arguments) }\n" +
|
79
|
-
"function HoverMenu(args) { return arr(arguments) }\n" +
|
80
|
-
"function ExpandableQText(args) { return arr(arguments) }\n" +
|
81
|
-
"function TruncatedQText(args) { return arr(arguments) }\n" +
|
82
|
-
"function UseMobileSite(args) { return arr(arguments) }\n" +
|
83
|
-
"function LoginSignal(args) { return arr(arguments) }\n" +
|
84
|
-
"function LiveLogin(args) { return arr(arguments) }\n" +
|
85
|
-
"function PresencePageMonitor(args) { return arr(arguments) }\n" +
|
86
|
-
"function UserSig(args) { return arr(arguments) }\n" +
|
87
|
-
"function HeaderLogo(args) { return arr(arguments) }\n" +
|
88
|
-
"function NavElement(args) { return arr(arguments) }\n" +
|
89
|
-
"function UserFollowLink(args) { return arr(arguments) }\n" +
|
90
|
-
"function FlashClient(args) { return arr(arguments) }\n" +
|
91
|
-
"function AddQuestionLink(args) { return arr(arguments) }\n" +
|
92
|
-
"function QTextImageEnlarger(args) { return arr(arguments) }\n" +
|
93
|
-
"function NavigatorResults(args) { return arr(arguments) }\n" +
|
94
|
-
"function RateAnswerApproveButton(args) { return arr(arguments) }\n" +
|
95
|
-
"function AnswerVoterNamesExpandable(args) { return arr(arguments) }\n" +
|
96
|
-
"function PrefetchManager(args) { return arr(arguments) }\n" +
|
97
|
-
"function ResponseRecorder(args) { return arr(arguments) }\n" +
|
98
|
-
'';
|
99
|
-
var tail = "\n_components;\n";
|
100
|
-
|
101
|
-
return head + body + tail;
|
102
|
-
}
|
103
|
-
|
104
|
-
collect_stdin(function(t) {
|
105
|
-
// u.puts(t)
|
106
|
-
|
107
|
-
var script = process.binding('evals').Script
|
108
|
-
var code = script.runInThisContext(prepare4eval(t))
|
109
|
-
// u.puts(u.inspect(code, false, null))
|
110
|
-
u.puts(JSON.stringify(get_timestamps(code), null, ' '))
|
111
|
-
})
|
@@ -1,81 +0,0 @@
|
|
1
|
-
# Quora uses JavaScript to dynamically insert timestamps for the
|
2
|
-
# questions/comments. To combat this, we cut the JS from the page,
|
3
|
-
# evaluate it in nodejs and construct a hash with 'article-name =>
|
4
|
-
# timestamp' pairs.
|
5
|
-
#
|
6
|
-
# Requires:
|
7
|
-
#
|
8
|
-
# - 'quora.js' script in directory with the plugin;
|
9
|
-
# - 1 option: a Quora user's name, for example 'Brandon-Smietana'
|
10
|
-
|
11
|
-
require 'nokogiri'
|
12
|
-
|
13
|
-
class Page < Bwkfanboy::Parse
|
14
|
-
module Meta
|
15
|
-
URI = 'http://www.quora.com/#{opt[0]}/answers'
|
16
|
-
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
|
17
|
-
ENC = 'UTF-8'
|
18
|
-
VERSION = 13
|
19
|
-
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
20
|
-
TITLE = "Last n answers (per-user) from Quora; requires nodejs 0.3.7+"
|
21
|
-
CONTENT_TYPE = 'html'
|
22
|
-
end
|
23
|
-
|
24
|
-
def myparse(stream)
|
25
|
-
profile = opt[0] # for example, 'Brandon-Smietana'
|
26
|
-
|
27
|
-
# read stdin
|
28
|
-
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
29
|
-
|
30
|
-
# extract & evaluate JavaScript into tstp
|
31
|
-
tstp = nil
|
32
|
-
doc.xpath("//script").each {|i|
|
33
|
-
js = i.text
|
34
|
-
if js.include?('"epoch_us"')
|
35
|
-
if Bwkfanboy::Utils.cfg[:verbose] >= 3
|
36
|
-
File.open("#{File.basename(__FILE__)}-epoch.js.raw", "w+") {|i| i.puts js }
|
37
|
-
end
|
38
|
-
# open a pipe, write js to it & read a JSON result
|
39
|
-
r = ''
|
40
|
-
begin
|
41
|
-
pipe = IO.popen("#{File.dirname(__FILE__)}/quora.js", 'w+')
|
42
|
-
pipe.puts js
|
43
|
-
pipe.close_write
|
44
|
-
while line = pipe.gets
|
45
|
-
r << line
|
46
|
-
end
|
47
|
-
pipe.close
|
48
|
-
rescue
|
49
|
-
fail "evaluation in nodejs failed: #{$!}"
|
50
|
-
end
|
51
|
-
# r = Bwkfanboy::Utils.cmd_run("echo '#{js}' | #{File.dirname(__FILE__)}/quora.js")
|
52
|
-
# fail "evaluation in nodejs failed: #{r[1]}" if r[0] != 0
|
53
|
-
tstp = JSON.parse r
|
54
|
-
break
|
55
|
-
end
|
56
|
-
}
|
57
|
-
|
58
|
-
# xpath movements
|
59
|
-
url = 'http://www.quora.com'
|
60
|
-
a = clean(doc.xpath("//h1").text())
|
61
|
-
|
62
|
-
doc.xpath("//div[@class='feed_item_question']").each {|i|
|
63
|
-
t = clean(i.xpath("h2").text())
|
64
|
-
|
65
|
-
l = clean(i.xpath("h2//a")[0].attributes['href'].value())
|
66
|
-
next unless tstp.key?(l) # ignore answers without timestamps
|
67
|
-
u = date(Time.at(tstp[l]/1000/1000).to_s)
|
68
|
-
# u = DateTime.new.iso8601
|
69
|
-
l = url + l + '/answer/' + profile
|
70
|
-
|
71
|
-
c = i.xpath("../div[@class='hidden expanded_q_text']/div").inner_html(encoding: Meta::ENC)
|
72
|
-
if c == ''
|
73
|
-
c = i.xpath("../../div/div/div/div[3]")
|
74
|
-
c.xpath("div").each {|j| j.remove() }
|
75
|
-
c = c.inner_html(encoding: Meta::ENC)
|
76
|
-
end
|
77
|
-
|
78
|
-
self << { title: t, link: l, updated: u, author: a, content: c }
|
79
|
-
}
|
80
|
-
end
|
81
|
-
end
|