wp 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +20 -0
- data/README.md +1 -0
- data/bin/wp +106 -0
- data/lib/wp.rb +28 -0
- data/lib/wp/config.rb +79 -0
- data/lib/wp/old/wiki.rb +33 -0
- data/lib/wp/old/xmlreader.rb +67 -0
- data/lib/wp/webserver.rb +115 -0
- data/lib/wp/wiki.rb +189 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ab7e33f36703b9dad5ab221b6232041779eca3a5
|
4
|
+
data.tar.gz: b5475589d63447669a7319aba5923519d80a1a16
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6f9614e78691b8c6a0d09ef2247d18e42bbb09e63855e49cdeb0803870071814dde4512bd0c9f21d0ba18b62eadfd74b4346f767e36dc54f92454c0f12c7b59a
|
7
|
+
data.tar.gz: fceac62168a01353bd64daa27055a24c9e5350c7266e853f5289bf643f3a8bedd7ca2302fbb7d94411604132b718eba9df4050c2dc053bcd703f6aeba3ba2e83
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Chris Gahan
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Writeme!
|
data/bin/wp
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
###########################################################
|
6
|
+
# TODOs:
|
7
|
+
# [ ] Handle redirects (in Wiki#article)
|
8
|
+
# [ ] Render templates
|
9
|
+
# [ ] Remove "edit" links
|
10
|
+
# [ ] Don't break on pages with & in them
|
11
|
+
# [x] handle redirects
|
12
|
+
# [ ] store redirects in leveldb
|
13
|
+
# (update leveldb as the redirects are discovered)
|
14
|
+
# [ ] Random titles (lookup 30 random letter pairs)
|
15
|
+
|
16
|
+
###########################################################
|
17
|
+
|
18
|
+
def console_viewer(html)
|
19
|
+
# IO.popen("lynx -stdin", "w") do |lynx|
|
20
|
+
IO.popen("w3m -T text/html", "w") do |lynx|
|
21
|
+
lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
###########################################################
|
26
|
+
|
27
|
+
require 'slop' # lazy loaded
|
28
|
+
|
29
|
+
opts = Slop.parse(help: true, strict: true) do
|
30
|
+
banner 'Usage: wp [options] <query...>'
|
31
|
+
|
32
|
+
on 's', 'server', 'Launch server'
|
33
|
+
on 'p=', 'port', 'Webserver port (default: 4567)'
|
34
|
+
on 'd', 'download', 'Download the latest dump'
|
35
|
+
# on 'u', 'update', 'Update the database (takes a LONG time)'
|
36
|
+
on 'i=', 'import', 'Import dump'
|
37
|
+
on 'o=', 'outfile', 'Where to output the file'
|
38
|
+
on 'c', 'config', 'Interactive config'
|
39
|
+
|
40
|
+
# command 'server' do
|
41
|
+
# on "p", "port", 'Port (default: 3000)', default: 3000
|
42
|
+
|
43
|
+
# run do |opts, args|
|
44
|
+
# server(opts.port)
|
45
|
+
# end
|
46
|
+
# end
|
47
|
+
end
|
48
|
+
|
49
|
+
###########################################################
|
50
|
+
|
51
|
+
require 'wp/wiki'
|
52
|
+
|
53
|
+
if opts.server?
|
54
|
+
# Run Webserver
|
55
|
+
|
56
|
+
require 'wp/webserver'
|
57
|
+
|
58
|
+
elsif opts.reindex?
|
59
|
+
#
|
60
|
+
|
61
|
+
elsif opts.update?
|
62
|
+
#
|
63
|
+
|
64
|
+
else
|
65
|
+
# CLI interface
|
66
|
+
query = ARGV.join(" ")
|
67
|
+
wiki = Wiki.new
|
68
|
+
|
69
|
+
titles = wiki.search(query, 20)
|
70
|
+
|
71
|
+
if titles.size == 1
|
72
|
+
title = titles.first
|
73
|
+
else
|
74
|
+
titles.each.with_index do |title, i|
|
75
|
+
puts "#{i+1}. #{title}"
|
76
|
+
end
|
77
|
+
|
78
|
+
print "> "
|
79
|
+
|
80
|
+
loop do
|
81
|
+
case STDIN.gets.strip
|
82
|
+
when ""
|
83
|
+
exit 1
|
84
|
+
when /^(\d+)$/
|
85
|
+
n = $1.to_i - 1
|
86
|
+
title = titles[n]
|
87
|
+
break
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
a = wiki.article(title)
|
94
|
+
|
95
|
+
html = []
|
96
|
+
html << "<title>#{a.title}</title>"
|
97
|
+
html << "<center><h1>#{a.title}</h1></center>"
|
98
|
+
html << "<center>(redirected from: <b>#{a.redirected_from}</b>)</center>" if a.redirected_from
|
99
|
+
html << a.html
|
100
|
+
|
101
|
+
console_viewer html.join("\n")
|
102
|
+
# ws = Thread.new { require 'wp/webserver' }
|
103
|
+
|
104
|
+
# system("luakit", "http://localhost:4567/#{CGI.escape title}")
|
105
|
+
end
|
106
|
+
|
data/lib/wp.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
|
2
|
+
require 'wp/wiki'
|
3
|
+
|
4
|
+
#####################################################################
|
5
|
+
|
6
|
+
def commatize(thing)
|
7
|
+
thing.to_s.reverse.each_slice(3).to_a.join(",").reverse
|
8
|
+
end
|
9
|
+
|
10
|
+
#####################################################################
|
11
|
+
|
12
|
+
class Downloader
|
13
|
+
|
14
|
+
def mech
|
15
|
+
require 'mechanize'
|
16
|
+
@mech ||= Mechanize.new
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
#####################################################################
|
22
|
+
|
23
|
+
def console_viewer(html)
|
24
|
+
IO.popen("lynx -stdin", "w") do |lynx|
|
25
|
+
# IO.popen("w3m -T text/html", "w") do |lynx|
|
26
|
+
lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
|
27
|
+
end
|
28
|
+
end
|
data/lib/wp/config.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'epitools'
|
2
|
+
|
3
|
+
module Pedia
|
4
|
+
class Config
|
5
|
+
|
6
|
+
# Constants #########################################################
|
7
|
+
|
8
|
+
CONFIG_DIR = File.expand_path "~/.config/pedia"
|
9
|
+
CACHE_DIR = File.expand_path "~/.cache/pedia"
|
10
|
+
CONFIG_FILE = File.join CONFIG_DIR, "config.yml"
|
11
|
+
|
12
|
+
# DSL ###############################################################
|
13
|
+
|
14
|
+
class Option < TypedStruct["name value default prompt type:symbol"]
|
15
|
+
|
16
|
+
def value
|
17
|
+
@value ||= @default
|
18
|
+
end
|
19
|
+
|
20
|
+
def init
|
21
|
+
case type
|
22
|
+
when :dir
|
23
|
+
File.mkdir value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def self.option(*args)
|
31
|
+
opt = Option.new(*args)
|
32
|
+
|
33
|
+
attr_accessor opt.name
|
34
|
+
|
35
|
+
@@spec ||= {}
|
36
|
+
@@spec[opt.name] = opt
|
37
|
+
end
|
38
|
+
|
39
|
+
# Options ###########################################################
|
40
|
+
|
41
|
+
option name: "db_path",
|
42
|
+
prompt: "Where should the database be stored (approx. 8 gigs)?",
|
43
|
+
default: CACHE_DIR,
|
44
|
+
type: :dir
|
45
|
+
|
46
|
+
option name: "xml_path",
|
47
|
+
prompt: "Where should the XML be stored (approx. 8 gigs)?",
|
48
|
+
default: CACHE_DIR,
|
49
|
+
type: :dir
|
50
|
+
|
51
|
+
|
52
|
+
# Methods ###########################################################
|
53
|
+
|
54
|
+
def initialize(filename="config.yml")
|
55
|
+
Dir.mkdir CONFIG_DIR unless File.exists? CONFIG_DIR
|
56
|
+
|
57
|
+
unless File.exists? CONFIG_FILE
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
yaml = YAML.load open(CONFIG_FILE)
|
62
|
+
|
63
|
+
@options = {}
|
64
|
+
|
65
|
+
yaml.each do |key, val|
|
66
|
+
p [key, val]
|
67
|
+
if opt = @@spec[key]
|
68
|
+
@options[key] = opt.with(value: val)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def options
|
75
|
+
@options
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class Config
|
79
|
+
end # module Pedia
|
data/lib/wp/old/wiki.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'leveldb'
|
2
|
+
|
3
|
+
# https://github.com/nricciar/wikicloth
|
4
|
+
require 'wikicloth'
|
5
|
+
|
6
|
+
class Wiki
|
7
|
+
|
8
|
+
def initialize(path="enwiki")
|
9
|
+
end
|
10
|
+
|
11
|
+
def [](key)
|
12
|
+
Zlib.inflate @db.get(key)
|
13
|
+
end
|
14
|
+
|
15
|
+
def titles(max=30)
|
16
|
+
search("", max)
|
17
|
+
end
|
18
|
+
|
19
|
+
def search(prefix, max=30)
|
20
|
+
@db.fwmkeys(prefix, max)
|
21
|
+
end
|
22
|
+
|
23
|
+
def html(title)
|
24
|
+
WikiCloth::Parser.new(data: self[title]).to_html
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
if $0 == __FILE__
|
31
|
+
wiki = Wiki.new
|
32
|
+
pp wiki.titles
|
33
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
# <title>AccessibleComputing</title>
|
4
|
+
# <ns>0</ns>
|
5
|
+
# <id>10</id>
|
6
|
+
# <redirect title="Computer accessibility" />
|
7
|
+
# <revision>
|
8
|
+
# <sha1>lo15ponaybcg2sf49sstw9gdjmdetnk</sha1>
|
9
|
+
# <format>text/x-wiki</format>
|
10
|
+
# <text>...ARTICLE...</text>
|
11
|
+
# </revision>
|
12
|
+
|
13
|
+
|
14
|
+
class XMLReader
|
15
|
+
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
def initialize(filename)
|
19
|
+
if filename[/\.bz2/]
|
20
|
+
io = IO.popen ["bunzip2", "-c", filename]
|
21
|
+
else
|
22
|
+
io = open(filename)
|
23
|
+
end
|
24
|
+
|
25
|
+
@reader = Nokogiri::XML::Reader io
|
26
|
+
end
|
27
|
+
|
28
|
+
def each
|
29
|
+
info = {}
|
30
|
+
|
31
|
+
@reader.each do |node|
|
32
|
+
|
33
|
+
case node.name
|
34
|
+
when "page"
|
35
|
+
if node.open?
|
36
|
+
info = {}
|
37
|
+
else
|
38
|
+
yield info
|
39
|
+
end
|
40
|
+
when "title", "text"
|
41
|
+
info[node.name] = node.text if node.open?
|
42
|
+
when "redirect"
|
43
|
+
info["redirect"] = node.attribute("title")
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
class Nokogiri::XML::Reader
|
55
|
+
def open?
|
56
|
+
node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
|
57
|
+
end
|
58
|
+
|
59
|
+
def closed?
|
60
|
+
node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
|
61
|
+
end
|
62
|
+
|
63
|
+
def text
|
64
|
+
inner_xml
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
data/lib/wp/webserver.rb
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'wp/wiki'
|
3
|
+
require "sinatra/reloader" # if development?
|
4
|
+
|
5
|
+
$wiki = Wiki.new
|
6
|
+
|
7
|
+
class Webserver < Sinatra::Base
|
8
|
+
|
9
|
+
def wiki
|
10
|
+
$wiki
|
11
|
+
end
|
12
|
+
|
13
|
+
def link_to(title, desc=nil)
|
14
|
+
desc ||= title
|
15
|
+
"<a href=\"/#{title}\">#{desc}</a>"
|
16
|
+
end
|
17
|
+
|
18
|
+
def highlight(title, query)
|
19
|
+
title.gsub(query) { |m| "<span style='background: yellow'>#{m}</span>"}
|
20
|
+
end
|
21
|
+
|
22
|
+
def header(query="")
|
23
|
+
%{
|
24
|
+
<form action="search">
|
25
|
+
<input type="text" name="q" value="#{query}">
|
26
|
+
<input type="submit" value="search">
|
27
|
+
</form>
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
get "/" do
|
32
|
+
letter = ('A'..'Z').to_a.sample
|
33
|
+
titles = wiki.search(letter, 20)
|
34
|
+
|
35
|
+
links = titles.map { |title| "<li>#{link_to title}</li>" }
|
36
|
+
|
37
|
+
%{
|
38
|
+
#{header}
|
39
|
+
|
40
|
+
<h1>#{letter}</h1>
|
41
|
+
<ul>
|
42
|
+
#{links.join "\n"}
|
43
|
+
</ul>
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
get "/search" do
|
48
|
+
query = params[:q]
|
49
|
+
titles = wiki.search query
|
50
|
+
|
51
|
+
links = titles.map do |title|
|
52
|
+
desc = highlight title, query
|
53
|
+
"<li>#{link_to title, desc}</li>"
|
54
|
+
end
|
55
|
+
|
56
|
+
%{
|
57
|
+
#{header query }
|
58
|
+
|
59
|
+
<h1>Query: #{query}</h1>
|
60
|
+
<ul>
|
61
|
+
#{links.join "\n"}
|
62
|
+
</ul>
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
get %r{/(.+)} do
|
67
|
+
title = params[:captures].first
|
68
|
+
|
69
|
+
if article = wiki.article(title)
|
70
|
+
%{
|
71
|
+
#{header article.title}
|
72
|
+
|
73
|
+
<h1>#{article.title}</h1>
|
74
|
+
|
75
|
+
#{article.html}
|
76
|
+
|
77
|
+
<!--
|
78
|
+
#{article.xml.to_s}
|
79
|
+
-->
|
80
|
+
}
|
81
|
+
else
|
82
|
+
"No article found."
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
run!
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
class WikiParser < WikiCloth::Parser
|
93
|
+
|
94
|
+
url_for do |page|
|
95
|
+
"javascript:alert('You clicked on: #{page}');"
|
96
|
+
end
|
97
|
+
|
98
|
+
link_attributes_for do |page|
|
99
|
+
{ :href => url_for(page) }
|
100
|
+
end
|
101
|
+
|
102
|
+
template do |template|
|
103
|
+
"Hello {{{1}}}" if template == "hello"
|
104
|
+
end
|
105
|
+
|
106
|
+
external_link do |url,text|
|
107
|
+
"<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
# @wiki = WikiParser.new({
|
113
|
+
# :params => { "PAGENAME" => "Testing123" },
|
114
|
+
# :data => "{{hello|world}} From {{ PAGENAME }} -- [www.google.com]";
|
115
|
+
# })
|
data/lib/wp/wiki.rb
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
require 'leveldb'
|
2
|
+
require 'snappy'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'wikicloth'
|
5
|
+
|
6
|
+
#####################################################################
|
7
|
+
|
8
|
+
def bz2_stream(thing, offset=nil, length=nil)
|
9
|
+
require 'open3'
|
10
|
+
|
11
|
+
open(thing, "rb") do |bz2|
|
12
|
+
|
13
|
+
Open3.popen2("bunzip2", "-c") do |inp, outp, th|
|
14
|
+
Thread.new do
|
15
|
+
IO.copy_stream(bz2, inp, length, offset)
|
16
|
+
inp.close
|
17
|
+
end
|
18
|
+
|
19
|
+
yield outp
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
#####################################################################
|
27
|
+
|
28
|
+
class Article < Struct.new(:title, :text, :redirect, :xml, :redirected_from)
|
29
|
+
|
30
|
+
def initialize(page)
|
31
|
+
self.xml = page
|
32
|
+
self.title = page.at("title").text
|
33
|
+
|
34
|
+
if redirect = page.at("redirect")
|
35
|
+
self.redirect = redirect["title"]
|
36
|
+
else
|
37
|
+
self.text = page.at("text").text
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def redirect?
|
43
|
+
!!redirect
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def to_html
|
48
|
+
# WikiCloth::Parser.new(data: text).to_html
|
49
|
+
WikiParser.new(data: text).to_html
|
50
|
+
end
|
51
|
+
|
52
|
+
alias_method :html, :to_html
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
#####################################################################
|
57
|
+
|
58
|
+
class XMLReader
|
59
|
+
|
60
|
+
include Enumerable
|
61
|
+
|
62
|
+
def initialize(input)
|
63
|
+
@doc = Nokogiri::XML.fragment input
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def each
|
68
|
+
@doc.search("page").each do |page|
|
69
|
+
yield Article.new(page)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
#####################################################################
|
76
|
+
|
77
|
+
class Wiki
|
78
|
+
#@@index_url = "http://dumps.wikimedia.org/enwiki/20130604/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
|
79
|
+
@@index_url = "/d/wiki/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
|
80
|
+
@@articles_url = "/d/wiki/enwiki-20130604-pages-articles-multistream.xml.bz2"
|
81
|
+
|
82
|
+
def initialize
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
def db
|
87
|
+
@db ||= LevelDB::DB.new File.expand_path("~/.cache/wp/enwiki-index")
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
def search(title, amount=40)
|
92
|
+
db.each(from: "title/#{title}").take(amount).map do |key, val|
|
93
|
+
key.split("/", 2).last
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def random(amount=30)
|
98
|
+
letters = [*'A'..'Z'] + [*'0'..'9']
|
99
|
+
|
100
|
+
(1..30).map do
|
101
|
+
prefix = letters.sample + letters.sample.downcase
|
102
|
+
key, val = db.each(from: prefix).first
|
103
|
+
key.split("/", 2).last
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def article(title, redirected_from=nil)
|
108
|
+
if title =~ %r{^title/(.+)}
|
109
|
+
title = $1
|
110
|
+
end
|
111
|
+
|
112
|
+
return nil unless offset = db["title/#{title}"]
|
113
|
+
|
114
|
+
offset = offset.to_i
|
115
|
+
length = db["length/#{offset}"].to_i
|
116
|
+
|
117
|
+
bz2_stream(@@articles_url, offset, length) do |io|
|
118
|
+
xml = XMLReader.new io.read
|
119
|
+
|
120
|
+
xml.each do |article|
|
121
|
+
|
122
|
+
if title == article.title
|
123
|
+
if article.redirect?
|
124
|
+
return article(article.redirect, title)
|
125
|
+
else
|
126
|
+
article.redirected_from = redirected_from
|
127
|
+
return article
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def import_index(index_url=@@index_url)
|
136
|
+
bz2_stream(index_url) do |io|
|
137
|
+
|
138
|
+
last_offset = nil
|
139
|
+
|
140
|
+
io.each_line.with_index do |line,i|
|
141
|
+
line.chomp!
|
142
|
+
|
143
|
+
offset, n, title = line.split(":", 3)
|
144
|
+
db["title/#{title}"] = offset
|
145
|
+
|
146
|
+
offset = offset.to_i
|
147
|
+
|
148
|
+
last_offset = offset if last_offset.nil?
|
149
|
+
|
150
|
+
if last_offset != offset
|
151
|
+
length = offset - last_offset
|
152
|
+
db["length/#{last_offset}"] = length.to_s
|
153
|
+
|
154
|
+
last_offset = offset
|
155
|
+
end
|
156
|
+
|
157
|
+
print "\e[1G#{commatize i} - #{title}\e[J" if i % 11337 == 0
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
#####################################################################
|
168
|
+
|
169
|
+
class WikiParser < WikiCloth::Parser
|
170
|
+
|
171
|
+
url_for do |page|
|
172
|
+
"javascript:alert('You clicked on: #{page}');"
|
173
|
+
end
|
174
|
+
|
175
|
+
link_attributes_for do |page|
|
176
|
+
{ :href => url_for(page) }
|
177
|
+
end
|
178
|
+
|
179
|
+
template do |template|
|
180
|
+
"Hello {{{1}}}" if template == "hello"
|
181
|
+
end
|
182
|
+
|
183
|
+
external_link do |url,text|
|
184
|
+
"<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
#####################################################################
|
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- epitron
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: slop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: leveldb-ruby
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: snappy
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sinatra
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: wikicloth
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: sinatra-contrib
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Imports Wikipedia dumps to a local database and lets you browse them
|
98
|
+
from the commandline or with a local sinatra server.
|
99
|
+
email: chris@ill-logic.com
|
100
|
+
executables:
|
101
|
+
- wp
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files:
|
104
|
+
- README.md
|
105
|
+
- LICENSE
|
106
|
+
files:
|
107
|
+
- bin/wp
|
108
|
+
- lib/wp.rb
|
109
|
+
- lib/wp/wiki.rb
|
110
|
+
- lib/wp/webserver.rb
|
111
|
+
- lib/wp/config.rb
|
112
|
+
- lib/wp/old/wiki.rb
|
113
|
+
- lib/wp/old/xmlreader.rb
|
114
|
+
- README.md
|
115
|
+
- LICENSE
|
116
|
+
homepage: http://github.com/epitron/wp/
|
117
|
+
licenses:
|
118
|
+
- WTFPL
|
119
|
+
metadata: {}
|
120
|
+
post_install_message:
|
121
|
+
rdoc_options: []
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - '>='
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
requirements: []
|
135
|
+
rubyforge_project:
|
136
|
+
rubygems_version: 2.0.3
|
137
|
+
signing_key:
|
138
|
+
specification_version: 4
|
139
|
+
summary: A local Wikipedia with commandline and web interfaces.
|
140
|
+
test_files: []
|
141
|
+
has_rdoc:
|