wp 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ab7e33f36703b9dad5ab221b6232041779eca3a5
4
+ data.tar.gz: b5475589d63447669a7319aba5923519d80a1a16
5
+ SHA512:
6
+ metadata.gz: 6f9614e78691b8c6a0d09ef2247d18e42bbb09e63855e49cdeb0803870071814dde4512bd0c9f21d0ba18b62eadfd74b4346f767e36dc54f92454c0f12c7b59a
7
+ data.tar.gz: fceac62168a01353bd64daa27055a24c9e5350c7266e853f5289bf643f3a8bedd7ca2302fbb7d94411604132b718eba9df4050c2dc053bcd703f6aeba3ba2e83
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Chris Gahan
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ Writeme!
data/bin/wp ADDED
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cgi'
4
+
5
+ ###########################################################
6
+ # TODOs:
7
+ # [ ] Handle redirects (in Wiki#article)
8
+ # [ ] Render templates
9
+ # [ ] Remove "edit" links
10
+ # [ ] Don't break on pages with & in them
11
+ # [x] handle redirects
12
+ # [ ] store redirects in leveldb
13
+ # (update leveldb as the redirects are discovered)
14
+ # [ ] Random titles (lookup 30 random letter pairs)
15
+
16
+ ###########################################################
17
+
18
+ def console_viewer(html)
19
+ # IO.popen("lynx -stdin", "w") do |lynx|
20
+ IO.popen("w3m -T text/html", "w") do |lynx|
21
+ lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
22
+ end
23
+ end
24
+
25
+ ###########################################################
26
+
27
+ require 'slop' # lazy loaded
28
+
29
+ opts = Slop.parse(help: true, strict: true) do
30
+ banner 'Usage: wp [options] <query...>'
31
+
32
+ on 's', 'server', 'Launch server'
33
+ on 'p=', 'port', 'Webserver port (default: 4567)'
34
+ on 'd', 'download', 'Download the latest dump'
35
+ # on 'u', 'update', 'Update the database (takes a LONG time)'
36
+ on 'i=', 'import', 'Import dump'
37
+ on 'o=', 'outfile', 'Where to output the file'
38
+ on 'c', 'config', 'Interactive config'
39
+
40
+ # command 'server' do
41
+ # on "p", "port", 'Port (default: 3000)', default: 3000
42
+
43
+ # run do |opts, args|
44
+ # server(opts.port)
45
+ # end
46
+ # end
47
+ end
48
+
49
+ ###########################################################
50
+
51
+ require 'wp/wiki'
52
+
53
+ if opts.server?
54
+ # Run Webserver
55
+
56
+ require 'wp/webserver'
57
+
58
+ elsif opts.reindex?
59
+ #
60
+
61
+ elsif opts.update?
62
+ #
63
+
64
+ else
65
+ # CLI interface
66
+ query = ARGV.join(" ")
67
+ wiki = Wiki.new
68
+
69
+ titles = wiki.search(query, 20)
70
+
71
+ if titles.size == 1
72
+ title = titles.first
73
+ else
74
+ titles.each.with_index do |title, i|
75
+ puts "#{i+1}. #{title}"
76
+ end
77
+
78
+ print "> "
79
+
80
+ loop do
81
+ case STDIN.gets.strip
82
+ when ""
83
+ exit 1
84
+ when /^(\d+)$/
85
+ n = $1.to_i - 1
86
+ title = titles[n]
87
+ break
88
+ end
89
+ end
90
+
91
+ end
92
+
93
+ a = wiki.article(title)
94
+
95
+ html = []
96
+ html << "<title>#{a.title}</title>"
97
+ html << "<center><h1>#{a.title}</h1></center>"
98
+ html << "<center>(redirected from: <b>#{a.redirected_from}</b>)</center>" if a.redirected_from
99
+ html << a.html
100
+
101
+ console_viewer html.join("\n")
102
+ # ws = Thread.new { require 'wp/webserver' }
103
+
104
+ # system("luakit", "http://localhost:4567/#{CGI.escape title}")
105
+ end
106
+
@@ -0,0 +1,28 @@
1
+
2
+ require 'wp/wiki'
3
+
4
+ #####################################################################
5
+
6
+ def commatize(thing)
7
+ thing.to_s.reverse.each_slice(3).to_a.join(",").reverse
8
+ end
9
+
10
+ #####################################################################
11
+
12
+ class Downloader
13
+
14
+ def mech
15
+ require 'mechanize'
16
+ @mech ||= Mechanize.new
17
+ end
18
+
19
+ end
20
+
21
+ #####################################################################
22
+
23
+ def console_viewer(html)
24
+ IO.popen("lynx -stdin", "w") do |lynx|
25
+ # IO.popen("w3m -T text/html", "w") do |lynx|
26
+ lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
27
+ end
28
+ end
@@ -0,0 +1,79 @@
1
+ require 'epitools'
2
+
3
+ module Pedia
4
+ class Config
5
+
6
+ # Constants #########################################################
7
+
8
+ CONFIG_DIR = File.expand_path "~/.config/pedia"
9
+ CACHE_DIR = File.expand_path "~/.cache/pedia"
10
+ CONFIG_FILE = File.join CONFIG_DIR, "config.yml"
11
+
12
+ # DSL ###############################################################
13
+
14
+ class Option < TypedStruct["name value default prompt type:symbol"]
15
+
16
+ def value
17
+ @value ||= @default
18
+ end
19
+
20
+ def init
21
+ case type
22
+ when :dir
23
+ File.mkdir value
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+
30
+ def self.option(*args)
31
+ opt = Option.new(*args)
32
+
33
+ attr_accessor opt.name
34
+
35
+ @@spec ||= {}
36
+ @@spec[opt.name] = opt
37
+ end
38
+
39
+ # Options ###########################################################
40
+
41
+ option name: "db_path",
42
+ prompt: "Where should the database be stored (approx. 8 gigs)?",
43
+ default: CACHE_DIR,
44
+ type: :dir
45
+
46
+ option name: "xml_path",
47
+ prompt: "Where should the XML be stored (approx. 8 gigs)?",
48
+ default: CACHE_DIR,
49
+ type: :dir
50
+
51
+
52
+ # Methods ###########################################################
53
+
54
+ def initialize(filename="config.yml")
55
+ Dir.mkdir CONFIG_DIR unless File.exists? CONFIG_DIR
56
+
57
+ unless File.exists? CONFIG_FILE
58
+
59
+ end
60
+
61
+ yaml = YAML.load open(CONFIG_FILE)
62
+
63
+ @options = {}
64
+
65
+ yaml.each do |key, val|
66
+ p [key, val]
67
+ if opt = @@spec[key]
68
+ @options[key] = opt.with(value: val)
69
+ end
70
+ end
71
+ end
72
+
73
+
74
+ def options
75
+ @options
76
+ end
77
+
78
+ end # class Config
79
+ end # module Pedia
@@ -0,0 +1,33 @@
1
+ require 'leveldb'
2
+
3
+ # https://github.com/nricciar/wikicloth
4
+ require 'wikicloth'
5
+
6
+ class Wiki
7
+
8
+ def initialize(path="enwiki")
9
+ end
10
+
11
+ def [](key)
12
+ Zlib.inflate @db.get(key)
13
+ end
14
+
15
+ def titles(max=30)
16
+ search("", max)
17
+ end
18
+
19
+ def search(prefix, max=30)
20
+ @db.fwmkeys(prefix, max)
21
+ end
22
+
23
+ def html(title)
24
+ WikiCloth::Parser.new(data: self[title]).to_html
25
+ end
26
+
27
+ end
28
+
29
+
30
+ if $0 == __FILE__
31
+ wiki = Wiki.new
32
+ pp wiki.titles
33
+ end
@@ -0,0 +1,67 @@
1
+ require 'nokogiri'
2
+
3
+ # <title>AccessibleComputing</title>
4
+ # <ns>0</ns>
5
+ # <id>10</id>
6
+ # <redirect title="Computer accessibility" />
7
+ # <revision>
8
+ # <sha1>lo15ponaybcg2sf49sstw9gdjmdetnk</sha1>
9
+ # <format>text/x-wiki</format>
10
+ # <text>...ARTICLE...</text>
11
+ # </revision>
12
+
13
+
14
+ class XMLReader
15
+
16
+ include Enumerable
17
+
18
+ def initialize(filename)
19
+ if filename[/\.bz2/]
20
+ io = IO.popen ["bunzip2", "-c", filename]
21
+ else
22
+ io = open(filename)
23
+ end
24
+
25
+ @reader = Nokogiri::XML::Reader io
26
+ end
27
+
28
+ def each
29
+ info = {}
30
+
31
+ @reader.each do |node|
32
+
33
+ case node.name
34
+ when "page"
35
+ if node.open?
36
+ info = {}
37
+ else
38
+ yield info
39
+ end
40
+ when "title", "text"
41
+ info[node.name] = node.text if node.open?
42
+ when "redirect"
43
+ info["redirect"] = node.attribute("title")
44
+ end
45
+
46
+ end
47
+ end
48
+
49
+ end
50
+
51
+
52
+
53
+
54
+ class Nokogiri::XML::Reader
55
+ def open?
56
+ node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
57
+ end
58
+
59
+ def closed?
60
+ node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
61
+ end
62
+
63
+ def text
64
+ inner_xml
65
+ end
66
+ end
67
+
@@ -0,0 +1,115 @@
1
+ require 'sinatra/base'
2
+ require 'wp/wiki'
3
+ require "sinatra/reloader" # if development?
4
+
5
+ $wiki = Wiki.new
6
+
7
+ class Webserver < Sinatra::Base
8
+
9
+ def wiki
10
+ $wiki
11
+ end
12
+
13
+ def link_to(title, desc=nil)
14
+ desc ||= title
15
+ "<a href=\"/#{title}\">#{desc}</a>"
16
+ end
17
+
18
+ def highlight(title, query)
19
+ title.gsub(query) { |m| "<span style='background: yellow'>#{m}</span>"}
20
+ end
21
+
22
+ def header(query="")
23
+ %{
24
+ <form action="search">
25
+ <input type="text" name="q" value="#{query}">
26
+ <input type="submit" value="search">
27
+ </form>
28
+ }
29
+ end
30
+
31
+ get "/" do
32
+ letter = ('A'..'Z').to_a.sample
33
+ titles = wiki.search(letter, 20)
34
+
35
+ links = titles.map { |title| "<li>#{link_to title}</li>" }
36
+
37
+ %{
38
+ #{header}
39
+
40
+ <h1>#{letter}</h1>
41
+ <ul>
42
+ #{links.join "\n"}
43
+ </ul>
44
+ }
45
+ end
46
+
47
+ get "/search" do
48
+ query = params[:q]
49
+ titles = wiki.search query
50
+
51
+ links = titles.map do |title|
52
+ desc = highlight title, query
53
+ "<li>#{link_to title, desc}</li>"
54
+ end
55
+
56
+ %{
57
+ #{header query }
58
+
59
+ <h1>Query: #{query}</h1>
60
+ <ul>
61
+ #{links.join "\n"}
62
+ </ul>
63
+ }
64
+ end
65
+
66
+ get %r{/(.+)} do
67
+ title = params[:captures].first
68
+
69
+ if article = wiki.article(title)
70
+ %{
71
+ #{header article.title}
72
+
73
+ <h1>#{article.title}</h1>
74
+
75
+ #{article.html}
76
+
77
+ <!--
78
+ #{article.xml.to_s}
79
+ -->
80
+ }
81
+ else
82
+ "No article found."
83
+ end
84
+ end
85
+
86
+ run!
87
+
88
+ end
89
+
90
+
91
+
92
+ class WikiParser < WikiCloth::Parser
93
+
94
+ url_for do |page|
95
+ "javascript:alert('You clicked on: #{page}');"
96
+ end
97
+
98
+ link_attributes_for do |page|
99
+ { :href => url_for(page) }
100
+ end
101
+
102
+ template do |template|
103
+ "Hello {{{1}}}" if template == "hello"
104
+ end
105
+
106
+ external_link do |url,text|
107
+ "<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
108
+ end
109
+
110
+ end
111
+
112
+ # @wiki = WikiParser.new({
113
+ # :params => { "PAGENAME" => "Testing123" },
114
+ # :data => "{{hello|world}} From {{ PAGENAME }} -- [www.google.com]";
115
+ # })
@@ -0,0 +1,189 @@
1
+ require 'leveldb'
2
+ require 'snappy'
3
+ require 'nokogiri'
4
+ require 'wikicloth'
5
+
6
+ #####################################################################
7
+
8
+ def bz2_stream(thing, offset=nil, length=nil)
9
+ require 'open3'
10
+
11
+ open(thing, "rb") do |bz2|
12
+
13
+ Open3.popen2("bunzip2", "-c") do |inp, outp, th|
14
+ Thread.new do
15
+ IO.copy_stream(bz2, inp, length, offset)
16
+ inp.close
17
+ end
18
+
19
+ yield outp
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+
26
+ #####################################################################
27
+
28
+ class Article < Struct.new(:title, :text, :redirect, :xml, :redirected_from)
29
+
30
+ def initialize(page)
31
+ self.xml = page
32
+ self.title = page.at("title").text
33
+
34
+ if redirect = page.at("redirect")
35
+ self.redirect = redirect["title"]
36
+ else
37
+ self.text = page.at("text").text
38
+ end
39
+ end
40
+
41
+
42
+ def redirect?
43
+ !!redirect
44
+ end
45
+
46
+
47
+ def to_html
48
+ # WikiCloth::Parser.new(data: text).to_html
49
+ WikiParser.new(data: text).to_html
50
+ end
51
+
52
+ alias_method :html, :to_html
53
+
54
+ end
55
+
56
+ #####################################################################
57
+
58
+ class XMLReader
59
+
60
+ include Enumerable
61
+
62
+ def initialize(input)
63
+ @doc = Nokogiri::XML.fragment input
64
+ end
65
+
66
+
67
+ def each
68
+ @doc.search("page").each do |page|
69
+ yield Article.new(page)
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ #####################################################################
76
+
77
+ class Wiki
78
+ #@@index_url = "http://dumps.wikimedia.org/enwiki/20130604/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
79
+ @@index_url = "/d/wiki/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
80
+ @@articles_url = "/d/wiki/enwiki-20130604-pages-articles-multistream.xml.bz2"
81
+
82
+ def initialize
83
+ end
84
+
85
+
86
+ def db
87
+ @db ||= LevelDB::DB.new File.expand_path("~/.cache/wp/enwiki-index")
88
+ end
89
+
90
+
91
+ def search(title, amount=40)
92
+ db.each(from: "title/#{title}").take(amount).map do |key, val|
93
+ key.split("/", 2).last
94
+ end
95
+ end
96
+
97
+ def random(amount=30)
98
+ letters = [*'A'..'Z'] + [*'0'..'9']
99
+
100
+ (1..30).map do
101
+ prefix = letters.sample + letters.sample.downcase
102
+ key, val = db.each(from: prefix).first
103
+ key.split("/", 2).last
104
+ end
105
+ end
106
+
107
+ def article(title, redirected_from=nil)
108
+ if title =~ %r{^title/(.+)}
109
+ title = $1
110
+ end
111
+
112
+ return nil unless offset = db["title/#{title}"]
113
+
114
+ offset = offset.to_i
115
+ length = db["length/#{offset}"].to_i
116
+
117
+ bz2_stream(@@articles_url, offset, length) do |io|
118
+ xml = XMLReader.new io.read
119
+
120
+ xml.each do |article|
121
+
122
+ if title == article.title
123
+ if article.redirect?
124
+ return article(article.redirect, title)
125
+ else
126
+ article.redirected_from = redirected_from
127
+ return article
128
+ end
129
+ end
130
+
131
+ end
132
+ end
133
+ end
134
+
135
+ def import_index(index_url=@@index_url)
136
+ bz2_stream(index_url) do |io|
137
+
138
+ last_offset = nil
139
+
140
+ io.each_line.with_index do |line,i|
141
+ line.chomp!
142
+
143
+ offset, n, title = line.split(":", 3)
144
+ db["title/#{title}"] = offset
145
+
146
+ offset = offset.to_i
147
+
148
+ last_offset = offset if last_offset.nil?
149
+
150
+ if last_offset != offset
151
+ length = offset - last_offset
152
+ db["length/#{last_offset}"] = length.to_s
153
+
154
+ last_offset = offset
155
+ end
156
+
157
+ print "\e[1G#{commatize i} - #{title}\e[J" if i % 11337 == 0
158
+
159
+ end
160
+
161
+ end
162
+ end
163
+
164
+
165
+ end
166
+
167
+ #####################################################################
168
+
169
+ class WikiParser < WikiCloth::Parser
170
+
171
+ url_for do |page|
172
+ "javascript:alert('You clicked on: #{page}');"
173
+ end
174
+
175
+ link_attributes_for do |page|
176
+ { :href => url_for(page) }
177
+ end
178
+
179
+ template do |template|
180
+ "Hello {{{1}}}" if template == "hello"
181
+ end
182
+
183
+ external_link do |url,text|
184
+ "<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
185
+ end
186
+
187
+ end
188
+
189
+ #####################################################################
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - epitron
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: slop
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: leveldb-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: snappy
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: sinatra
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: wikicloth
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: sinatra-contrib
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Imports Wikipedia dumps to a local database and lets you browse them
98
+ from the commandline or with a local sinatra server.
99
+ email: chris@ill-logic.com
100
+ executables:
101
+ - wp
102
+ extensions: []
103
+ extra_rdoc_files:
104
+ - README.md
105
+ - LICENSE
106
+ files:
107
+ - bin/wp
108
+ - lib/wp.rb
109
+ - lib/wp/wiki.rb
110
+ - lib/wp/webserver.rb
111
+ - lib/wp/config.rb
112
+ - lib/wp/old/wiki.rb
113
+ - lib/wp/old/xmlreader.rb
114
+ - README.md
115
+ - LICENSE
116
+ homepage: http://github.com/epitron/wp/
117
+ licenses:
118
+ - WTFPL
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - '>='
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.0.3
137
+ signing_key:
138
+ specification_version: 4
139
+ summary: A local Wikipedia with commandline and web interfaces.
140
+ test_files: []
141
+ has_rdoc: