bwkfanboy 1.3.2 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/test/semis/links.txt CHANGED
@@ -1,4 +1,4 @@
1
1
  # output_file URL
2
- quora.html http://www.quora.com/Brandon-Smietana/answers
2
+ inc.html http://www.inc.com/author/jason-fried
3
3
  bwk.html http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan
4
4
  econlib.html http://www.econlib.org/cgi-bin/searcharticles.pl?sortby=DD&query=ha*
data/test/test_server.rb CHANGED
@@ -25,19 +25,20 @@ class TestServer < MiniTest::Unit::TestCase
25
25
  assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}") }
26
26
  assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=INVALID") }
27
27
  # 'o' is missing
28
- assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=quora") }
28
+ assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=inc") }
29
29
  end
30
30
 
31
- def test_right_plugin
32
- r = ''
33
- open("http://#{ADDR}:#{@port}/?p=bwk") { |f| r = f.read }
34
- # wget -q -O - 127.0.0.1:9042/\?p=bwk | md5
35
- assert_equal('64186fac2c52e5a969ad5675b9cc95ed', Digest::MD5.hexdigest(r))
36
-
37
- r = ''
38
- open("http://#{ADDR}:#{@port}/?p=quora&o=foo") { |f| r = f.read }
39
- # bin/bwkfanboy_server -Dd
40
- # wget -q -O - '127.0.0.1:9042/\?p=quora&o=foo' | md5
41
- assert_equal('0f3f6607768392d69d15621eee815ab3', Digest::MD5.hexdigest(r))
31
+ def test_right_plugins
32
+ plugins = {
33
+ 'bwk' => '64186fac2c52e5a969ad5675b9cc95ed',
34
+ 'econlib' => '11f6114a9ab54d6ec67a26cbd76f5260',
35
+ 'inc' => '13dae248c81dd6407ff327dd5575f8b5',
36
+ }
37
+ plugins.each {|k,v|
38
+ r = ''
39
+ open("http://#{ADDR}:#{@port}/?p=#{k}&o=foo") { |f| r = f.read }
40
+ # wget -q -O - '127.0.0.1:9042/?p=inc&o=foo' | md5
41
+ assert_equal(v, Digest::MD5.hexdigest(r))
42
+ }
42
43
  end
43
44
  end
metadata CHANGED
@@ -1,108 +1,82 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bwkfanboy
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 3
8
- - 2
9
- version: 1.3.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.4.0
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Alexander Gromnitsky
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-03-28 00:00:00 +03:00
18
- default_executable: bwkfanboy
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-04-06 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: open4
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &338037960 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 0
31
- - 1
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
32
21
  version: 1.0.1
33
22
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: activesupport
37
23
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *338037960
25
+ - !ruby/object:Gem::Dependency
26
+ name: activesupport
27
+ requirement: &338035630 !ruby/object:Gem::Requirement
39
28
  none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 3
45
- - 0
46
- - 5
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
47
32
  version: 3.0.5
48
33
  type: :runtime
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: nokogiri
52
34
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *338035630
36
+ - !ruby/object:Gem::Dependency
37
+ name: nokogiri
38
+ requirement: &338034330 !ruby/object:Gem::Requirement
54
39
  none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- segments:
59
- - 1
60
- - 4
61
- - 4
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
62
43
  version: 1.4.4
63
44
  type: :runtime
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- name: jsonschema
67
45
  prerelease: false
68
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *338034330
47
+ - !ruby/object:Gem::Dependency
48
+ name: jsonschema
49
+ requirement: &338032480 !ruby/object:Gem::Requirement
69
50
  none: false
70
- requirements:
71
- - - ">="
72
- - !ruby/object:Gem::Version
73
- segments:
74
- - 2
75
- - 0
76
- - 1
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
77
54
  version: 2.0.1
78
55
  type: :runtime
79
- version_requirements: *id004
80
- - !ruby/object:Gem::Dependency
81
- name: git
82
56
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *338032480
58
+ - !ruby/object:Gem::Dependency
59
+ name: git
60
+ requirement: &338031220 !ruby/object:Gem::Requirement
84
61
  none: false
85
- requirements:
86
- - - ">="
87
- - !ruby/object:Gem::Version
88
- segments:
89
- - 1
90
- - 2
91
- - 5
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
92
65
  version: 1.2.5
93
66
  type: :development
94
- version_requirements: *id005
95
- description: A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.
67
+ prerelease: false
68
+ version_requirements: *338031220
69
+ description: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use
70
+ it to watch sites that do not provide its own feed.
96
71
  email: alexander.gromnitsky@gmail.com
97
- executables:
72
+ executables:
98
73
  - bwkfanboy_generate
99
74
  - bwkfanboy_parse
100
75
  - bwkfanboy
101
76
  - bwkfanboy_server
102
77
  - bwkfanboy_fetch
103
78
  extensions: []
104
-
105
- extra_rdoc_files:
79
+ extra_rdoc_files:
106
80
  - doc/NEWS.rdoc
107
81
  - doc/plugin.rdoc
108
82
  - doc/bwkfanboy_fetch.rdoc
@@ -110,7 +84,7 @@ extra_rdoc_files:
110
84
  - doc/bwkfanboy_parse.rdoc
111
85
  - doc/bwkfanboy_server.rdoc
112
86
  - doc/README.rdoc
113
- files:
87
+ files:
114
88
  - README.rdoc
115
89
  - Rakefile
116
90
  - bin/bwkfanboy
@@ -137,8 +111,7 @@ files:
137
111
  - lib/bwkfanboy/plugins/bwk.rb
138
112
  - lib/bwkfanboy/plugins/econlib.rb
139
113
  - lib/bwkfanboy/plugins/freebsd-ports-update.rb
140
- - lib/bwkfanboy/plugins/quora.js
141
- - lib/bwkfanboy/plugins/quora.rb
114
+ - lib/bwkfanboy/plugins/inc.rb
142
115
  - lib/bwkfanboy/schema.js
143
116
  - lib/bwkfanboy/utils.rb
144
117
  - test/helper.rb
@@ -149,52 +122,44 @@ files:
149
122
  - test/semis/bwk.html
150
123
  - test/semis/bwk.json
151
124
  - test/semis/econlib.html
125
+ - test/semis/inc.html
152
126
  - test/semis/links.txt
153
- - test/semis/quora.html
154
127
  - test/test_fetch.rb
155
128
  - test/test_generate.rb
156
129
  - test/test_parse.rb
157
130
  - test/test_server.rb
158
131
  - test/xml-clean.sh
159
132
  - web/bwkfanboy.cgi
160
- has_rdoc: true
161
133
  homepage: http://github.com/gromnitsky/bwkfanboy
162
134
  licenses: []
163
-
164
135
  post_install_message:
165
- rdoc_options:
136
+ rdoc_options:
166
137
  - -m
167
138
  - doc/README.rdoc
168
139
  - -x
169
140
  - plugins
170
- require_paths:
141
+ require_paths:
171
142
  - lib
172
- required_ruby_version: !ruby/object:Gem::Requirement
143
+ required_ruby_version: !ruby/object:Gem::Requirement
173
144
  none: false
174
- requirements:
175
- - - ">="
176
- - !ruby/object:Gem::Version
177
- segments:
178
- - 1
179
- - 9
180
- - 2
145
+ requirements:
146
+ - - ! '>='
147
+ - !ruby/object:Gem::Version
181
148
  version: 1.9.2
182
- required_rubygems_version: !ruby/object:Gem::Requirement
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
150
  none: false
184
- requirements:
185
- - - ">="
186
- - !ruby/object:Gem::Version
187
- segments:
188
- - 0
189
- version: "0"
151
+ requirements:
152
+ - - ! '>='
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
190
155
  requirements: []
191
-
192
156
  rubyforge_project:
193
- rubygems_version: 1.3.7
157
+ rubygems_version: 1.7.2
194
158
  signing_key:
195
159
  specification_version: 3
196
- summary: A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.
197
- test_files:
160
+ summary: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use it
161
+ to watch sites that do not provide its own feed.
162
+ test_files:
198
163
  - test/test_fetch.rb
199
164
  - test/test_parse.rb
200
165
  - test/test_server.rb
@@ -1,111 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /*
4
- A companion to 'quora.rb' plugin.
5
- */
6
-
7
- var u = require('util')
8
-
9
- function get_timestamps(data) {
10
- var e = {}
11
- var link = null
12
- for (i in data) {
13
- var f = data[i][0]
14
-
15
- if (f == 'FeedStoryItem') {
16
- link = data[i][3]['q_path']
17
- // u.puts(link)
18
- }
19
-
20
- if (f == 'DateTimeComponent') {
21
- e[link] = data[i][3]['epoch_us']
22
- }
23
- }
24
- return e
25
- }
26
-
27
- function collect_stdin(callback) {
28
- var input = '';
29
- process.stdin.resume();
30
- process.stdin.setEncoding('ascii');
31
- process.stdin.on('data', function (chunk) {
32
- input += chunk
33
- });
34
- process.stdin.on('end', function () {
35
- callback(input);
36
- });
37
- };
38
-
39
- function prepare4eval(body) {
40
- var head = "function W2() {}\n" +
41
- "W2.addComponentMetadata = function(foo) {}\n" +
42
- "W2.registerComponents = function(foo) {}\n" +
43
- "W2._ConnectionWarningCls = function(args) {}\n" +
44
- "W2._LoadingCls = function(args) {}\n" +
45
- "W2._InteractionModeCls = function(args) {}\n" +
46
- "W2.onLoad = function(args) {}\n" +
47
- "\n" +
48
- "document = 'foo'\n" +
49
- "$ = function(foo) { return $ }\n" +
50
- "$.ready = function(foo) {}\n" +
51
- "\n" +
52
- "arr = function(args) {\n" +
53
- " a = []\n" +
54
- " a.push(args.callee.name)\n" +
55
- " for(var i = 0; i < args.length; i++) { a.push(args[i]) }\n" +
56
- " return a\n" +
57
- "}\n" +
58
- "\n" +
59
- "function LoginButton(args) { return arr(arguments) }\n" +
60
- "function ContextNavigator(args) { return arr(arguments) }\n" +
61
- "function TypeaheadContextText(args) { return arr(arguments) }\n" +
62
- "function TypeaheadResults(args) { return arr(arguments) }\n" +
63
- "function QuestionAddLink(args) { return arr(arguments) }\n" +
64
- "function TitleNotificationsCount(args) { return arr(arguments) }\n" +
65
- "function TextareaAutoSize(args) { return arr(arguments) }\n" +
66
- "function PMsgContainer(args) { return arr(arguments) }\n" +
67
- "function UserAdminMenuLink(args) { return arr(arguments) }\n" +
68
- "function PagedList(args) { return arr(arguments) }\n" +
69
- "function FeedStoryItem(args) { return arr(arguments) }\n" +
70
- "function QuestionLink(args) { return arr(arguments) }\n" +
71
- "function QuestionBestSourceIcon(args) { return arr(arguments) }\n" +
72
- "function AnswerVotingButtons(args) { return arr(arguments) }\n" +
73
- "function TruncatePhraseList(args) { return arr(arguments) }\n" +
74
- "function CommentLink(args) { return arr(arguments) }\n" +
75
- "function DateTimeComponent(args) { return arr(arguments) }\n" +
76
- "function AnswerComments(args) { return arr(arguments) }\n" +
77
- "function Comment(args) { return arr(arguments) }\n" +
78
- "function FeedAnswerItem(args) { return arr(arguments) }\n" +
79
- "function HoverMenu(args) { return arr(arguments) }\n" +
80
- "function ExpandableQText(args) { return arr(arguments) }\n" +
81
- "function TruncatedQText(args) { return arr(arguments) }\n" +
82
- "function UseMobileSite(args) { return arr(arguments) }\n" +
83
- "function LoginSignal(args) { return arr(arguments) }\n" +
84
- "function LiveLogin(args) { return arr(arguments) }\n" +
85
- "function PresencePageMonitor(args) { return arr(arguments) }\n" +
86
- "function UserSig(args) { return arr(arguments) }\n" +
87
- "function HeaderLogo(args) { return arr(arguments) }\n" +
88
- "function NavElement(args) { return arr(arguments) }\n" +
89
- "function UserFollowLink(args) { return arr(arguments) }\n" +
90
- "function FlashClient(args) { return arr(arguments) }\n" +
91
- "function AddQuestionLink(args) { return arr(arguments) }\n" +
92
- "function QTextImageEnlarger(args) { return arr(arguments) }\n" +
93
- "function NavigatorResults(args) { return arr(arguments) }\n" +
94
- "function RateAnswerApproveButton(args) { return arr(arguments) }\n" +
95
- "function AnswerVoterNamesExpandable(args) { return arr(arguments) }\n" +
96
- "function PrefetchManager(args) { return arr(arguments) }\n" +
97
- "function ResponseRecorder(args) { return arr(arguments) }\n" +
98
- '';
99
- var tail = "\n_components;\n";
100
-
101
- return head + body + tail;
102
- }
103
-
104
- collect_stdin(function(t) {
105
- // u.puts(t)
106
-
107
- var script = process.binding('evals').Script
108
- var code = script.runInThisContext(prepare4eval(t))
109
- // u.puts(u.inspect(code, false, null))
110
- u.puts(JSON.stringify(get_timestamps(code), null, ' '))
111
- })
@@ -1,81 +0,0 @@
1
- # Quora uses JavaScript to dynamically insert timestamps for the
2
- # questions/comments. To combat this, we cut the JS from the page,
3
- # evaluate it in nodejs and construct a hash with 'article-name =>
4
- # timestamp' pairs.
5
- #
6
- # Requires:
7
- #
8
- # - 'quora.js' script in directory with the plugin;
9
- # - 1 option: a Quora user's name, for example 'Brandon-Smietana'
10
-
11
- require 'nokogiri'
12
-
13
- class Page < Bwkfanboy::Parse
14
- module Meta
15
- URI = 'http://www.quora.com/#{opt[0]}/answers'
16
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
17
- ENC = 'UTF-8'
18
- VERSION = 13
19
- COPYRIGHT = "See bwkfanboy's LICENSE file"
20
- TITLE = "Last n answers (per-user) from Quora; requires nodejs 0.3.7+"
21
- CONTENT_TYPE = 'html'
22
- end
23
-
24
- def myparse(stream)
25
- profile = opt[0] # for example, 'Brandon-Smietana'
26
-
27
- # read stdin
28
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
29
-
30
- # extract & evaluate JavaScript into tstp
31
- tstp = nil
32
- doc.xpath("//script").each {|i|
33
- js = i.text
34
- if js.include?('"epoch_us"')
35
- if Bwkfanboy::Utils.cfg[:verbose] >= 3
36
- File.open("#{File.basename(__FILE__)}-epoch.js.raw", "w+") {|i| i.puts js }
37
- end
38
- # open a pipe, write js to it & read a JSON result
39
- r = ''
40
- begin
41
- pipe = IO.popen("#{File.dirname(__FILE__)}/quora.js", 'w+')
42
- pipe.puts js
43
- pipe.close_write
44
- while line = pipe.gets
45
- r << line
46
- end
47
- pipe.close
48
- rescue
49
- fail "evaluation in nodejs failed: #{$!}"
50
- end
51
- # r = Bwkfanboy::Utils.cmd_run("echo '#{js}' | #{File.dirname(__FILE__)}/quora.js")
52
- # fail "evaluation in nodejs failed: #{r[1]}" if r[0] != 0
53
- tstp = JSON.parse r
54
- break
55
- end
56
- }
57
-
58
- # xpath movements
59
- url = 'http://www.quora.com'
60
- a = clean(doc.xpath("//h1").text())
61
-
62
- doc.xpath("//div[@class='feed_item_question']").each {|i|
63
- t = clean(i.xpath("h2").text())
64
-
65
- l = clean(i.xpath("h2//a")[0].attributes['href'].value())
66
- next unless tstp.key?(l) # ignore answers without timestamps
67
- u = date(Time.at(tstp[l]/1000/1000).to_s)
68
- # u = DateTime.new.iso8601
69
- l = url + l + '/answer/' + profile
70
-
71
- c = i.xpath("../div[@class='hidden expanded_q_text']/div").inner_html(encoding: Meta::ENC)
72
- if c == ''
73
- c = i.xpath("../../div/div/div/div[3]")
74
- c.xpath("div").each {|j| j.remove() }
75
- c = c.inner_html(encoding: Meta::ENC)
76
- end
77
-
78
- self << { title: t, link: l, updated: u, author: a, content: c }
79
- }
80
- end
81
- end