bwkfanboy 1.3.2 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/Rakefile +1 -2
- data/bin/bwkfanboy +1 -0
- data/doc/NEWS.rdoc +10 -0
- data/doc/README.rdoc +1 -1
- data/lib/bwkfanboy/meta.rb +1 -1
- data/lib/bwkfanboy/plugins/inc.rb +37 -0
- data/test/semis/inc.html +1067 -0
- data/test/semis/links.txt +1 -1
- data/test/test_server.rb +13 -12
- metadata +64 -99
- data/lib/bwkfanboy/plugins/quora.js +0 -111
- data/lib/bwkfanboy/plugins/quora.rb +0 -81
- data/test/semis/quora.html +0 -45
data/test/semis/links.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1
1
|
# output_file URL
|
2
|
-
|
2
|
+
inc.html http://www.inc.com/author/jason-fried
|
3
3
|
bwk.html http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan
|
4
4
|
econlib.html http://www.econlib.org/cgi-bin/searcharticles.pl?sortby=DD&query=ha*
|
data/test/test_server.rb
CHANGED
@@ -25,19 +25,20 @@ class TestServer < MiniTest::Unit::TestCase
|
|
25
25
|
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}") }
|
26
26
|
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=INVALID") }
|
27
27
|
# 'o' is missing
|
28
|
-
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=
|
28
|
+
assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=inc") }
|
29
29
|
end
|
30
30
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
31
|
+
def test_right_plugins
|
32
|
+
plugins = {
|
33
|
+
'bwk' => '64186fac2c52e5a969ad5675b9cc95ed',
|
34
|
+
'econlib' => '11f6114a9ab54d6ec67a26cbd76f5260',
|
35
|
+
'inc' => '13dae248c81dd6407ff327dd5575f8b5',
|
36
|
+
}
|
37
|
+
plugins.each {|k,v|
|
38
|
+
r = ''
|
39
|
+
open("http://#{ADDR}:#{@port}/?p=#{k}&o=foo") { |f| r = f.read }
|
40
|
+
# wget -q -O - '127.0.0.1:9042/?p=inc&o=foo' | md5
|
41
|
+
assert_equal(v, Digest::MD5.hexdigest(r))
|
42
|
+
}
|
42
43
|
end
|
43
44
|
end
|
metadata
CHANGED
@@ -1,108 +1,82 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bwkfanboy
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 3
|
8
|
-
- 2
|
9
|
-
version: 1.3.2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.4.0
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Alexander Gromnitsky
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-04-06 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: open4
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &338037960 !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
segments:
|
29
|
-
- 1
|
30
|
-
- 0
|
31
|
-
- 1
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
32
21
|
version: 1.0.1
|
33
22
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: activesupport
|
37
23
|
prerelease: false
|
38
|
-
|
24
|
+
version_requirements: *338037960
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: activesupport
|
27
|
+
requirement: &338035630 !ruby/object:Gem::Requirement
|
39
28
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
segments:
|
44
|
-
- 3
|
45
|
-
- 0
|
46
|
-
- 5
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
47
32
|
version: 3.0.5
|
48
33
|
type: :runtime
|
49
|
-
version_requirements: *id002
|
50
|
-
- !ruby/object:Gem::Dependency
|
51
|
-
name: nokogiri
|
52
34
|
prerelease: false
|
53
|
-
|
35
|
+
version_requirements: *338035630
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: nokogiri
|
38
|
+
requirement: &338034330 !ruby/object:Gem::Requirement
|
54
39
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
segments:
|
59
|
-
- 1
|
60
|
-
- 4
|
61
|
-
- 4
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
62
43
|
version: 1.4.4
|
63
44
|
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: jsonschema
|
67
45
|
prerelease: false
|
68
|
-
|
46
|
+
version_requirements: *338034330
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: jsonschema
|
49
|
+
requirement: &338032480 !ruby/object:Gem::Requirement
|
69
50
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
segments:
|
74
|
-
- 2
|
75
|
-
- 0
|
76
|
-
- 1
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
77
54
|
version: 2.0.1
|
78
55
|
type: :runtime
|
79
|
-
version_requirements: *id004
|
80
|
-
- !ruby/object:Gem::Dependency
|
81
|
-
name: git
|
82
56
|
prerelease: false
|
83
|
-
|
57
|
+
version_requirements: *338032480
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: git
|
60
|
+
requirement: &338031220 !ruby/object:Gem::Requirement
|
84
61
|
none: false
|
85
|
-
requirements:
|
86
|
-
- -
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
segments:
|
89
|
-
- 1
|
90
|
-
- 2
|
91
|
-
- 5
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
92
65
|
version: 1.2.5
|
93
66
|
type: :development
|
94
|
-
|
95
|
-
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *338031220
|
69
|
+
description: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use
|
70
|
+
it to watch sites that do not provide its own feed.
|
96
71
|
email: alexander.gromnitsky@gmail.com
|
97
|
-
executables:
|
72
|
+
executables:
|
98
73
|
- bwkfanboy_generate
|
99
74
|
- bwkfanboy_parse
|
100
75
|
- bwkfanboy
|
101
76
|
- bwkfanboy_server
|
102
77
|
- bwkfanboy_fetch
|
103
78
|
extensions: []
|
104
|
-
|
105
|
-
extra_rdoc_files:
|
79
|
+
extra_rdoc_files:
|
106
80
|
- doc/NEWS.rdoc
|
107
81
|
- doc/plugin.rdoc
|
108
82
|
- doc/bwkfanboy_fetch.rdoc
|
@@ -110,7 +84,7 @@ extra_rdoc_files:
|
|
110
84
|
- doc/bwkfanboy_parse.rdoc
|
111
85
|
- doc/bwkfanboy_server.rdoc
|
112
86
|
- doc/README.rdoc
|
113
|
-
files:
|
87
|
+
files:
|
114
88
|
- README.rdoc
|
115
89
|
- Rakefile
|
116
90
|
- bin/bwkfanboy
|
@@ -137,8 +111,7 @@ files:
|
|
137
111
|
- lib/bwkfanboy/plugins/bwk.rb
|
138
112
|
- lib/bwkfanboy/plugins/econlib.rb
|
139
113
|
- lib/bwkfanboy/plugins/freebsd-ports-update.rb
|
140
|
-
- lib/bwkfanboy/plugins/
|
141
|
-
- lib/bwkfanboy/plugins/quora.rb
|
114
|
+
- lib/bwkfanboy/plugins/inc.rb
|
142
115
|
- lib/bwkfanboy/schema.js
|
143
116
|
- lib/bwkfanboy/utils.rb
|
144
117
|
- test/helper.rb
|
@@ -149,52 +122,44 @@ files:
|
|
149
122
|
- test/semis/bwk.html
|
150
123
|
- test/semis/bwk.json
|
151
124
|
- test/semis/econlib.html
|
125
|
+
- test/semis/inc.html
|
152
126
|
- test/semis/links.txt
|
153
|
-
- test/semis/quora.html
|
154
127
|
- test/test_fetch.rb
|
155
128
|
- test/test_generate.rb
|
156
129
|
- test/test_parse.rb
|
157
130
|
- test/test_server.rb
|
158
131
|
- test/xml-clean.sh
|
159
132
|
- web/bwkfanboy.cgi
|
160
|
-
has_rdoc: true
|
161
133
|
homepage: http://github.com/gromnitsky/bwkfanboy
|
162
134
|
licenses: []
|
163
|
-
|
164
135
|
post_install_message:
|
165
|
-
rdoc_options:
|
136
|
+
rdoc_options:
|
166
137
|
- -m
|
167
138
|
- doc/README.rdoc
|
168
139
|
- -x
|
169
140
|
- plugins
|
170
|
-
require_paths:
|
141
|
+
require_paths:
|
171
142
|
- lib
|
172
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
143
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
173
144
|
none: false
|
174
|
-
requirements:
|
175
|
-
- -
|
176
|
-
- !ruby/object:Gem::Version
|
177
|
-
segments:
|
178
|
-
- 1
|
179
|
-
- 9
|
180
|
-
- 2
|
145
|
+
requirements:
|
146
|
+
- - ! '>='
|
147
|
+
- !ruby/object:Gem::Version
|
181
148
|
version: 1.9.2
|
182
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
183
150
|
none: false
|
184
|
-
requirements:
|
185
|
-
- -
|
186
|
-
- !ruby/object:Gem::Version
|
187
|
-
|
188
|
-
- 0
|
189
|
-
version: "0"
|
151
|
+
requirements:
|
152
|
+
- - ! '>='
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
190
155
|
requirements: []
|
191
|
-
|
192
156
|
rubyforge_project:
|
193
|
-
rubygems_version: 1.
|
157
|
+
rubygems_version: 1.7.2
|
194
158
|
signing_key:
|
195
159
|
specification_version: 3
|
196
|
-
summary:
|
197
|
-
|
160
|
+
summary: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use it
|
161
|
+
to watch sites that do not provide its own feed.
|
162
|
+
test_files:
|
198
163
|
- test/test_fetch.rb
|
199
164
|
- test/test_parse.rb
|
200
165
|
- test/test_server.rb
|
@@ -1,111 +0,0 @@
|
|
1
|
-
#!/usr/bin/env node
|
2
|
-
|
3
|
-
/*
|
4
|
-
A companion to 'quora.rb' plugin.
|
5
|
-
*/
|
6
|
-
|
7
|
-
var u = require('util')
|
8
|
-
|
9
|
-
function get_timestamps(data) {
|
10
|
-
var e = {}
|
11
|
-
var link = null
|
12
|
-
for (i in data) {
|
13
|
-
var f = data[i][0]
|
14
|
-
|
15
|
-
if (f == 'FeedStoryItem') {
|
16
|
-
link = data[i][3]['q_path']
|
17
|
-
// u.puts(link)
|
18
|
-
}
|
19
|
-
|
20
|
-
if (f == 'DateTimeComponent') {
|
21
|
-
e[link] = data[i][3]['epoch_us']
|
22
|
-
}
|
23
|
-
}
|
24
|
-
return e
|
25
|
-
}
|
26
|
-
|
27
|
-
function collect_stdin(callback) {
|
28
|
-
var input = '';
|
29
|
-
process.stdin.resume();
|
30
|
-
process.stdin.setEncoding('ascii');
|
31
|
-
process.stdin.on('data', function (chunk) {
|
32
|
-
input += chunk
|
33
|
-
});
|
34
|
-
process.stdin.on('end', function () {
|
35
|
-
callback(input);
|
36
|
-
});
|
37
|
-
};
|
38
|
-
|
39
|
-
function prepare4eval(body) {
|
40
|
-
var head = "function W2() {}\n" +
|
41
|
-
"W2.addComponentMetadata = function(foo) {}\n" +
|
42
|
-
"W2.registerComponents = function(foo) {}\n" +
|
43
|
-
"W2._ConnectionWarningCls = function(args) {}\n" +
|
44
|
-
"W2._LoadingCls = function(args) {}\n" +
|
45
|
-
"W2._InteractionModeCls = function(args) {}\n" +
|
46
|
-
"W2.onLoad = function(args) {}\n" +
|
47
|
-
"\n" +
|
48
|
-
"document = 'foo'\n" +
|
49
|
-
"$ = function(foo) { return $ }\n" +
|
50
|
-
"$.ready = function(foo) {}\n" +
|
51
|
-
"\n" +
|
52
|
-
"arr = function(args) {\n" +
|
53
|
-
" a = []\n" +
|
54
|
-
" a.push(args.callee.name)\n" +
|
55
|
-
" for(var i = 0; i < args.length; i++) { a.push(args[i]) }\n" +
|
56
|
-
" return a\n" +
|
57
|
-
"}\n" +
|
58
|
-
"\n" +
|
59
|
-
"function LoginButton(args) { return arr(arguments) }\n" +
|
60
|
-
"function ContextNavigator(args) { return arr(arguments) }\n" +
|
61
|
-
"function TypeaheadContextText(args) { return arr(arguments) }\n" +
|
62
|
-
"function TypeaheadResults(args) { return arr(arguments) }\n" +
|
63
|
-
"function QuestionAddLink(args) { return arr(arguments) }\n" +
|
64
|
-
"function TitleNotificationsCount(args) { return arr(arguments) }\n" +
|
65
|
-
"function TextareaAutoSize(args) { return arr(arguments) }\n" +
|
66
|
-
"function PMsgContainer(args) { return arr(arguments) }\n" +
|
67
|
-
"function UserAdminMenuLink(args) { return arr(arguments) }\n" +
|
68
|
-
"function PagedList(args) { return arr(arguments) }\n" +
|
69
|
-
"function FeedStoryItem(args) { return arr(arguments) }\n" +
|
70
|
-
"function QuestionLink(args) { return arr(arguments) }\n" +
|
71
|
-
"function QuestionBestSourceIcon(args) { return arr(arguments) }\n" +
|
72
|
-
"function AnswerVotingButtons(args) { return arr(arguments) }\n" +
|
73
|
-
"function TruncatePhraseList(args) { return arr(arguments) }\n" +
|
74
|
-
"function CommentLink(args) { return arr(arguments) }\n" +
|
75
|
-
"function DateTimeComponent(args) { return arr(arguments) }\n" +
|
76
|
-
"function AnswerComments(args) { return arr(arguments) }\n" +
|
77
|
-
"function Comment(args) { return arr(arguments) }\n" +
|
78
|
-
"function FeedAnswerItem(args) { return arr(arguments) }\n" +
|
79
|
-
"function HoverMenu(args) { return arr(arguments) }\n" +
|
80
|
-
"function ExpandableQText(args) { return arr(arguments) }\n" +
|
81
|
-
"function TruncatedQText(args) { return arr(arguments) }\n" +
|
82
|
-
"function UseMobileSite(args) { return arr(arguments) }\n" +
|
83
|
-
"function LoginSignal(args) { return arr(arguments) }\n" +
|
84
|
-
"function LiveLogin(args) { return arr(arguments) }\n" +
|
85
|
-
"function PresencePageMonitor(args) { return arr(arguments) }\n" +
|
86
|
-
"function UserSig(args) { return arr(arguments) }\n" +
|
87
|
-
"function HeaderLogo(args) { return arr(arguments) }\n" +
|
88
|
-
"function NavElement(args) { return arr(arguments) }\n" +
|
89
|
-
"function UserFollowLink(args) { return arr(arguments) }\n" +
|
90
|
-
"function FlashClient(args) { return arr(arguments) }\n" +
|
91
|
-
"function AddQuestionLink(args) { return arr(arguments) }\n" +
|
92
|
-
"function QTextImageEnlarger(args) { return arr(arguments) }\n" +
|
93
|
-
"function NavigatorResults(args) { return arr(arguments) }\n" +
|
94
|
-
"function RateAnswerApproveButton(args) { return arr(arguments) }\n" +
|
95
|
-
"function AnswerVoterNamesExpandable(args) { return arr(arguments) }\n" +
|
96
|
-
"function PrefetchManager(args) { return arr(arguments) }\n" +
|
97
|
-
"function ResponseRecorder(args) { return arr(arguments) }\n" +
|
98
|
-
'';
|
99
|
-
var tail = "\n_components;\n";
|
100
|
-
|
101
|
-
return head + body + tail;
|
102
|
-
}
|
103
|
-
|
104
|
-
collect_stdin(function(t) {
|
105
|
-
// u.puts(t)
|
106
|
-
|
107
|
-
var script = process.binding('evals').Script
|
108
|
-
var code = script.runInThisContext(prepare4eval(t))
|
109
|
-
// u.puts(u.inspect(code, false, null))
|
110
|
-
u.puts(JSON.stringify(get_timestamps(code), null, ' '))
|
111
|
-
})
|
@@ -1,81 +0,0 @@
|
|
1
|
-
# Quora uses JavaScript to dynamically insert timestamps for the
|
2
|
-
# questions/comments. To combat this, we cut the JS from the page,
|
3
|
-
# evaluate it in nodejs and construct a hash with 'article-name =>
|
4
|
-
# timestamp' pairs.
|
5
|
-
#
|
6
|
-
# Requires:
|
7
|
-
#
|
8
|
-
# - 'quora.js' script in directory with the plugin;
|
9
|
-
# - 1 option: a Quora user's name, for example 'Brandon-Smietana'
|
10
|
-
|
11
|
-
require 'nokogiri'
|
12
|
-
|
13
|
-
class Page < Bwkfanboy::Parse
|
14
|
-
module Meta
|
15
|
-
URI = 'http://www.quora.com/#{opt[0]}/answers'
|
16
|
-
URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
|
17
|
-
ENC = 'UTF-8'
|
18
|
-
VERSION = 13
|
19
|
-
COPYRIGHT = "See bwkfanboy's LICENSE file"
|
20
|
-
TITLE = "Last n answers (per-user) from Quora; requires nodejs 0.3.7+"
|
21
|
-
CONTENT_TYPE = 'html'
|
22
|
-
end
|
23
|
-
|
24
|
-
def myparse(stream)
|
25
|
-
profile = opt[0] # for example, 'Brandon-Smietana'
|
26
|
-
|
27
|
-
# read stdin
|
28
|
-
doc = Nokogiri::HTML(stream, nil, Meta::ENC)
|
29
|
-
|
30
|
-
# extract & evaluate JavaScript into tstp
|
31
|
-
tstp = nil
|
32
|
-
doc.xpath("//script").each {|i|
|
33
|
-
js = i.text
|
34
|
-
if js.include?('"epoch_us"')
|
35
|
-
if Bwkfanboy::Utils.cfg[:verbose] >= 3
|
36
|
-
File.open("#{File.basename(__FILE__)}-epoch.js.raw", "w+") {|i| i.puts js }
|
37
|
-
end
|
38
|
-
# open a pipe, write js to it & read a JSON result
|
39
|
-
r = ''
|
40
|
-
begin
|
41
|
-
pipe = IO.popen("#{File.dirname(__FILE__)}/quora.js", 'w+')
|
42
|
-
pipe.puts js
|
43
|
-
pipe.close_write
|
44
|
-
while line = pipe.gets
|
45
|
-
r << line
|
46
|
-
end
|
47
|
-
pipe.close
|
48
|
-
rescue
|
49
|
-
fail "evaluation in nodejs failed: #{$!}"
|
50
|
-
end
|
51
|
-
# r = Bwkfanboy::Utils.cmd_run("echo '#{js}' | #{File.dirname(__FILE__)}/quora.js")
|
52
|
-
# fail "evaluation in nodejs failed: #{r[1]}" if r[0] != 0
|
53
|
-
tstp = JSON.parse r
|
54
|
-
break
|
55
|
-
end
|
56
|
-
}
|
57
|
-
|
58
|
-
# xpath movements
|
59
|
-
url = 'http://www.quora.com'
|
60
|
-
a = clean(doc.xpath("//h1").text())
|
61
|
-
|
62
|
-
doc.xpath("//div[@class='feed_item_question']").each {|i|
|
63
|
-
t = clean(i.xpath("h2").text())
|
64
|
-
|
65
|
-
l = clean(i.xpath("h2//a")[0].attributes['href'].value())
|
66
|
-
next unless tstp.key?(l) # ignore answers without timestamps
|
67
|
-
u = date(Time.at(tstp[l]/1000/1000).to_s)
|
68
|
-
# u = DateTime.new.iso8601
|
69
|
-
l = url + l + '/answer/' + profile
|
70
|
-
|
71
|
-
c = i.xpath("../div[@class='hidden expanded_q_text']/div").inner_html(encoding: Meta::ENC)
|
72
|
-
if c == ''
|
73
|
-
c = i.xpath("../../div/div/div/div[3]")
|
74
|
-
c.xpath("div").each {|j| j.remove() }
|
75
|
-
c = c.inner_html(encoding: Meta::ENC)
|
76
|
-
end
|
77
|
-
|
78
|
-
self << { title: t, link: l, updated: u, author: a, content: c }
|
79
|
-
}
|
80
|
-
end
|
81
|
-
end
|