bwkfanboy 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/semis/links.txt CHANGED
@@ -1,4 +1,4 @@
1
1
  # output_file URL
2
- quora.html http://www.quora.com/Brandon-Smietana/answers
2
+ inc.html http://www.inc.com/author/jason-fried
3
3
  bwk.html http://www.dailyprincetonian.com/advanced_search/?author=Brian+Kernighan
4
4
  econlib.html http://www.econlib.org/cgi-bin/searcharticles.pl?sortby=DD&query=ha*
data/test/test_server.rb CHANGED
@@ -25,19 +25,20 @@ class TestServer < MiniTest::Unit::TestCase
25
25
  assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}") }
26
26
  assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=INVALID") }
27
27
  # 'o' is missing
28
- assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=quora") }
28
+ assert_raises(OpenURI::HTTPError) { open("http://#{ADDR}:#{@port}/?p=inc") }
29
29
  end
30
30
 
31
- def test_right_plugin
32
- r = ''
33
- open("http://#{ADDR}:#{@port}/?p=bwk") { |f| r = f.read }
34
- # wget -q -O - 127.0.0.1:9042/\?p=bwk | md5
35
- assert_equal('64186fac2c52e5a969ad5675b9cc95ed', Digest::MD5.hexdigest(r))
36
-
37
- r = ''
38
- open("http://#{ADDR}:#{@port}/?p=quora&o=foo") { |f| r = f.read }
39
- # bin/bwkfanboy_server -Dd
40
- # wget -q -O - '127.0.0.1:9042/\?p=quora&o=foo' | md5
41
- assert_equal('0f3f6607768392d69d15621eee815ab3', Digest::MD5.hexdigest(r))
31
+ def test_right_plugins
32
+ plugins = {
33
+ 'bwk' => '64186fac2c52e5a969ad5675b9cc95ed',
34
+ 'econlib' => '11f6114a9ab54d6ec67a26cbd76f5260',
35
+ 'inc' => '13dae248c81dd6407ff327dd5575f8b5',
36
+ }
37
+ plugins.each {|k,v|
38
+ r = ''
39
+ open("http://#{ADDR}:#{@port}/?p=#{k}&o=foo") { |f| r = f.read }
40
+ # wget -q -O - '127.0.0.1:9042/?p=inc&o=foo' | md5
41
+ assert_equal(v, Digest::MD5.hexdigest(r))
42
+ }
42
43
  end
43
44
  end
metadata CHANGED
@@ -1,108 +1,82 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bwkfanboy
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 1
7
- - 3
8
- - 2
9
- version: 1.3.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.4.0
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Alexander Gromnitsky
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-03-28 00:00:00 +03:00
18
- default_executable: bwkfanboy
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-04-06 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: open4
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &338037960 !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 0
31
- - 1
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
32
21
  version: 1.0.1
33
22
  type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: activesupport
37
23
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *338037960
25
+ - !ruby/object:Gem::Dependency
26
+ name: activesupport
27
+ requirement: &338035630 !ruby/object:Gem::Requirement
39
28
  none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 3
45
- - 0
46
- - 5
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
47
32
  version: 3.0.5
48
33
  type: :runtime
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: nokogiri
52
34
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *338035630
36
+ - !ruby/object:Gem::Dependency
37
+ name: nokogiri
38
+ requirement: &338034330 !ruby/object:Gem::Requirement
54
39
  none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- segments:
59
- - 1
60
- - 4
61
- - 4
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
62
43
  version: 1.4.4
63
44
  type: :runtime
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
66
- name: jsonschema
67
45
  prerelease: false
68
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *338034330
47
+ - !ruby/object:Gem::Dependency
48
+ name: jsonschema
49
+ requirement: &338032480 !ruby/object:Gem::Requirement
69
50
  none: false
70
- requirements:
71
- - - ">="
72
- - !ruby/object:Gem::Version
73
- segments:
74
- - 2
75
- - 0
76
- - 1
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
77
54
  version: 2.0.1
78
55
  type: :runtime
79
- version_requirements: *id004
80
- - !ruby/object:Gem::Dependency
81
- name: git
82
56
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *338032480
58
+ - !ruby/object:Gem::Dependency
59
+ name: git
60
+ requirement: &338031220 !ruby/object:Gem::Requirement
84
61
  none: false
85
- requirements:
86
- - - ">="
87
- - !ruby/object:Gem::Version
88
- segments:
89
- - 1
90
- - 2
91
- - 5
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
92
65
  version: 1.2.5
93
66
  type: :development
94
- version_requirements: *id005
95
- description: A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.
67
+ prerelease: false
68
+ version_requirements: *338031220
69
+ description: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use
70
+ it to watch sites that do not provide its own feed.
96
71
  email: alexander.gromnitsky@gmail.com
97
- executables:
72
+ executables:
98
73
  - bwkfanboy_generate
99
74
  - bwkfanboy_parse
100
75
  - bwkfanboy
101
76
  - bwkfanboy_server
102
77
  - bwkfanboy_fetch
103
78
  extensions: []
104
-
105
- extra_rdoc_files:
79
+ extra_rdoc_files:
106
80
  - doc/NEWS.rdoc
107
81
  - doc/plugin.rdoc
108
82
  - doc/bwkfanboy_fetch.rdoc
@@ -110,7 +84,7 @@ extra_rdoc_files:
110
84
  - doc/bwkfanboy_parse.rdoc
111
85
  - doc/bwkfanboy_server.rdoc
112
86
  - doc/README.rdoc
113
- files:
87
+ files:
114
88
  - README.rdoc
115
89
  - Rakefile
116
90
  - bin/bwkfanboy
@@ -137,8 +111,7 @@ files:
137
111
  - lib/bwkfanboy/plugins/bwk.rb
138
112
  - lib/bwkfanboy/plugins/econlib.rb
139
113
  - lib/bwkfanboy/plugins/freebsd-ports-update.rb
140
- - lib/bwkfanboy/plugins/quora.js
141
- - lib/bwkfanboy/plugins/quora.rb
114
+ - lib/bwkfanboy/plugins/inc.rb
142
115
  - lib/bwkfanboy/schema.js
143
116
  - lib/bwkfanboy/utils.rb
144
117
  - test/helper.rb
@@ -149,52 +122,44 @@ files:
149
122
  - test/semis/bwk.html
150
123
  - test/semis/bwk.json
151
124
  - test/semis/econlib.html
125
+ - test/semis/inc.html
152
126
  - test/semis/links.txt
153
- - test/semis/quora.html
154
127
  - test/test_fetch.rb
155
128
  - test/test_generate.rb
156
129
  - test/test_parse.rb
157
130
  - test/test_server.rb
158
131
  - test/xml-clean.sh
159
132
  - web/bwkfanboy.cgi
160
- has_rdoc: true
161
133
  homepage: http://github.com/gromnitsky/bwkfanboy
162
134
  licenses: []
163
-
164
135
  post_install_message:
165
- rdoc_options:
136
+ rdoc_options:
166
137
  - -m
167
138
  - doc/README.rdoc
168
139
  - -x
169
140
  - plugins
170
- require_paths:
141
+ require_paths:
171
142
  - lib
172
- required_ruby_version: !ruby/object:Gem::Requirement
143
+ required_ruby_version: !ruby/object:Gem::Requirement
173
144
  none: false
174
- requirements:
175
- - - ">="
176
- - !ruby/object:Gem::Version
177
- segments:
178
- - 1
179
- - 9
180
- - 2
145
+ requirements:
146
+ - - ! '>='
147
+ - !ruby/object:Gem::Version
181
148
  version: 1.9.2
182
- required_rubygems_version: !ruby/object:Gem::Requirement
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
150
  none: false
184
- requirements:
185
- - - ">="
186
- - !ruby/object:Gem::Version
187
- segments:
188
- - 0
189
- version: "0"
151
+ requirements:
152
+ - - ! '>='
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
190
155
  requirements: []
191
-
192
156
  rubyforge_project:
193
- rubygems_version: 1.3.7
157
+ rubygems_version: 1.7.2
194
158
  signing_key:
195
159
  specification_version: 3
196
- summary: A converter from HTML to Atom feed that you can use to watch sites that do not provide its own feed.
197
- test_files:
160
+ summary: bwkfanboy is a converter from a raw HTML to an Atom feed. You can use it
161
+ to watch sites that do not provide its own feed.
162
+ test_files:
198
163
  - test/test_fetch.rb
199
164
  - test/test_parse.rb
200
165
  - test/test_server.rb
@@ -1,111 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /*
4
- A companion to 'quora.rb' plugin.
5
- */
6
-
7
- var u = require('util')
8
-
9
- function get_timestamps(data) {
10
- var e = {}
11
- var link = null
12
- for (i in data) {
13
- var f = data[i][0]
14
-
15
- if (f == 'FeedStoryItem') {
16
- link = data[i][3]['q_path']
17
- // u.puts(link)
18
- }
19
-
20
- if (f == 'DateTimeComponent') {
21
- e[link] = data[i][3]['epoch_us']
22
- }
23
- }
24
- return e
25
- }
26
-
27
- function collect_stdin(callback) {
28
- var input = '';
29
- process.stdin.resume();
30
- process.stdin.setEncoding('ascii');
31
- process.stdin.on('data', function (chunk) {
32
- input += chunk
33
- });
34
- process.stdin.on('end', function () {
35
- callback(input);
36
- });
37
- };
38
-
39
- function prepare4eval(body) {
40
- var head = "function W2() {}\n" +
41
- "W2.addComponentMetadata = function(foo) {}\n" +
42
- "W2.registerComponents = function(foo) {}\n" +
43
- "W2._ConnectionWarningCls = function(args) {}\n" +
44
- "W2._LoadingCls = function(args) {}\n" +
45
- "W2._InteractionModeCls = function(args) {}\n" +
46
- "W2.onLoad = function(args) {}\n" +
47
- "\n" +
48
- "document = 'foo'\n" +
49
- "$ = function(foo) { return $ }\n" +
50
- "$.ready = function(foo) {}\n" +
51
- "\n" +
52
- "arr = function(args) {\n" +
53
- " a = []\n" +
54
- " a.push(args.callee.name)\n" +
55
- " for(var i = 0; i < args.length; i++) { a.push(args[i]) }\n" +
56
- " return a\n" +
57
- "}\n" +
58
- "\n" +
59
- "function LoginButton(args) { return arr(arguments) }\n" +
60
- "function ContextNavigator(args) { return arr(arguments) }\n" +
61
- "function TypeaheadContextText(args) { return arr(arguments) }\n" +
62
- "function TypeaheadResults(args) { return arr(arguments) }\n" +
63
- "function QuestionAddLink(args) { return arr(arguments) }\n" +
64
- "function TitleNotificationsCount(args) { return arr(arguments) }\n" +
65
- "function TextareaAutoSize(args) { return arr(arguments) }\n" +
66
- "function PMsgContainer(args) { return arr(arguments) }\n" +
67
- "function UserAdminMenuLink(args) { return arr(arguments) }\n" +
68
- "function PagedList(args) { return arr(arguments) }\n" +
69
- "function FeedStoryItem(args) { return arr(arguments) }\n" +
70
- "function QuestionLink(args) { return arr(arguments) }\n" +
71
- "function QuestionBestSourceIcon(args) { return arr(arguments) }\n" +
72
- "function AnswerVotingButtons(args) { return arr(arguments) }\n" +
73
- "function TruncatePhraseList(args) { return arr(arguments) }\n" +
74
- "function CommentLink(args) { return arr(arguments) }\n" +
75
- "function DateTimeComponent(args) { return arr(arguments) }\n" +
76
- "function AnswerComments(args) { return arr(arguments) }\n" +
77
- "function Comment(args) { return arr(arguments) }\n" +
78
- "function FeedAnswerItem(args) { return arr(arguments) }\n" +
79
- "function HoverMenu(args) { return arr(arguments) }\n" +
80
- "function ExpandableQText(args) { return arr(arguments) }\n" +
81
- "function TruncatedQText(args) { return arr(arguments) }\n" +
82
- "function UseMobileSite(args) { return arr(arguments) }\n" +
83
- "function LoginSignal(args) { return arr(arguments) }\n" +
84
- "function LiveLogin(args) { return arr(arguments) }\n" +
85
- "function PresencePageMonitor(args) { return arr(arguments) }\n" +
86
- "function UserSig(args) { return arr(arguments) }\n" +
87
- "function HeaderLogo(args) { return arr(arguments) }\n" +
88
- "function NavElement(args) { return arr(arguments) }\n" +
89
- "function UserFollowLink(args) { return arr(arguments) }\n" +
90
- "function FlashClient(args) { return arr(arguments) }\n" +
91
- "function AddQuestionLink(args) { return arr(arguments) }\n" +
92
- "function QTextImageEnlarger(args) { return arr(arguments) }\n" +
93
- "function NavigatorResults(args) { return arr(arguments) }\n" +
94
- "function RateAnswerApproveButton(args) { return arr(arguments) }\n" +
95
- "function AnswerVoterNamesExpandable(args) { return arr(arguments) }\n" +
96
- "function PrefetchManager(args) { return arr(arguments) }\n" +
97
- "function ResponseRecorder(args) { return arr(arguments) }\n" +
98
- '';
99
- var tail = "\n_components;\n";
100
-
101
- return head + body + tail;
102
- }
103
-
104
- collect_stdin(function(t) {
105
- // u.puts(t)
106
-
107
- var script = process.binding('evals').Script
108
- var code = script.runInThisContext(prepare4eval(t))
109
- // u.puts(u.inspect(code, false, null))
110
- u.puts(JSON.stringify(get_timestamps(code), null, ' '))
111
- })
@@ -1,81 +0,0 @@
1
- # Quora uses JavaScript to dynamically insert timestamps for the
2
- # questions/comments. To combat this, we cut the JS from the page,
3
- # evaluate it in nodejs and construct a hash with 'article-name =>
4
- # timestamp' pairs.
5
- #
6
- # Requires:
7
- #
8
- # - 'quora.js' script in directory with the plugin;
9
- # - 1 option: a Quora user's name, for example 'Brandon-Smietana'
10
-
11
- require 'nokogiri'
12
-
13
- class Page < Bwkfanboy::Parse
14
- module Meta
15
- URI = 'http://www.quora.com/#{opt[0]}/answers'
16
- URI_DEBUG = '/home/alex/lib/software/alex/bwkfanboy/test/semis/quora.html'
17
- ENC = 'UTF-8'
18
- VERSION = 13
19
- COPYRIGHT = "See bwkfanboy's LICENSE file"
20
- TITLE = "Last n answers (per-user) from Quora; requires nodejs 0.3.7+"
21
- CONTENT_TYPE = 'html'
22
- end
23
-
24
- def myparse(stream)
25
- profile = opt[0] # for example, 'Brandon-Smietana'
26
-
27
- # read stdin
28
- doc = Nokogiri::HTML(stream, nil, Meta::ENC)
29
-
30
- # extract & evaluate JavaScript into tstp
31
- tstp = nil
32
- doc.xpath("//script").each {|i|
33
- js = i.text
34
- if js.include?('"epoch_us"')
35
- if Bwkfanboy::Utils.cfg[:verbose] >= 3
36
- File.open("#{File.basename(__FILE__)}-epoch.js.raw", "w+") {|i| i.puts js }
37
- end
38
- # open a pipe, write js to it & read a JSON result
39
- r = ''
40
- begin
41
- pipe = IO.popen("#{File.dirname(__FILE__)}/quora.js", 'w+')
42
- pipe.puts js
43
- pipe.close_write
44
- while line = pipe.gets
45
- r << line
46
- end
47
- pipe.close
48
- rescue
49
- fail "evaluation in nodejs failed: #{$!}"
50
- end
51
- # r = Bwkfanboy::Utils.cmd_run("echo '#{js}' | #{File.dirname(__FILE__)}/quora.js")
52
- # fail "evaluation in nodejs failed: #{r[1]}" if r[0] != 0
53
- tstp = JSON.parse r
54
- break
55
- end
56
- }
57
-
58
- # xpath movements
59
- url = 'http://www.quora.com'
60
- a = clean(doc.xpath("//h1").text())
61
-
62
- doc.xpath("//div[@class='feed_item_question']").each {|i|
63
- t = clean(i.xpath("h2").text())
64
-
65
- l = clean(i.xpath("h2//a")[0].attributes['href'].value())
66
- next unless tstp.key?(l) # ignore answers without timestamps
67
- u = date(Time.at(tstp[l]/1000/1000).to_s)
68
- # u = DateTime.new.iso8601
69
- l = url + l + '/answer/' + profile
70
-
71
- c = i.xpath("../div[@class='hidden expanded_q_text']/div").inner_html(encoding: Meta::ENC)
72
- if c == ''
73
- c = i.xpath("../../div/div/div/div[3]")
74
- c.xpath("div").each {|j| j.remove() }
75
- c = c.inner_html(encoding: Meta::ENC)
76
- end
77
-
78
- self << { title: t, link: l, updated: u, author: a, content: c }
79
- }
80
- end
81
- end