wp 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +20 -0
- data/README.md +1 -0
- data/bin/wp +106 -0
- data/lib/wp.rb +28 -0
- data/lib/wp/config.rb +79 -0
- data/lib/wp/old/wiki.rb +33 -0
- data/lib/wp/old/xmlreader.rb +67 -0
- data/lib/wp/webserver.rb +115 -0
- data/lib/wp/wiki.rb +189 -0
- metadata +141 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ab7e33f36703b9dad5ab221b6232041779eca3a5
|
4
|
+
data.tar.gz: b5475589d63447669a7319aba5923519d80a1a16
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6f9614e78691b8c6a0d09ef2247d18e42bbb09e63855e49cdeb0803870071814dde4512bd0c9f21d0ba18b62eadfd74b4346f767e36dc54f92454c0f12c7b59a
|
7
|
+
data.tar.gz: fceac62168a01353bd64daa27055a24c9e5350c7266e853f5289bf643f3a8bedd7ca2302fbb7d94411604132b718eba9df4050c2dc053bcd703f6aeba3ba2e83
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Chris Gahan
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Writeme!
|
data/bin/wp
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
###########################################################
|
6
|
+
# TODOs:
|
7
|
+
# [ ] Handle redirects (in Wiki#article)
|
8
|
+
# [ ] Render templates
|
9
|
+
# [ ] Remove "edit" links
|
10
|
+
# [ ] Don't break on pages with & in them
|
11
|
+
# [x] handle redirects
|
12
|
+
# [ ] store redirects in leveldb
|
13
|
+
# (update leveldb as the redirects are discovered)
|
14
|
+
# [ ] Random titles (lookup 30 random letter pairs)
|
15
|
+
|
16
|
+
###########################################################
|
17
|
+
|
18
|
+
def console_viewer(html)
|
19
|
+
# IO.popen("lynx -stdin", "w") do |lynx|
|
20
|
+
IO.popen("w3m -T text/html", "w") do |lynx|
|
21
|
+
lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
###########################################################
|
26
|
+
|
27
|
+
require 'slop' # lazy loaded
|
28
|
+
|
29
|
+
opts = Slop.parse(help: true, strict: true) do
|
30
|
+
banner 'Usage: wp [options] <query...>'
|
31
|
+
|
32
|
+
on 's', 'server', 'Launch server'
|
33
|
+
on 'p=', 'port', 'Webserver port (default: 4567)'
|
34
|
+
on 'd', 'download', 'Download the latest dump'
|
35
|
+
# on 'u', 'update', 'Update the database (takes a LONG time)'
|
36
|
+
on 'i=', 'import', 'Import dump'
|
37
|
+
on 'o=', 'outfile', 'Where to output the file'
|
38
|
+
on 'c', 'config', 'Interactive config'
|
39
|
+
|
40
|
+
# command 'server' do
|
41
|
+
# on "p", "port", 'Port (default: 3000)', default: 3000
|
42
|
+
|
43
|
+
# run do |opts, args|
|
44
|
+
# server(opts.port)
|
45
|
+
# end
|
46
|
+
# end
|
47
|
+
end
|
48
|
+
|
49
|
+
###########################################################
|
50
|
+
|
51
|
+
require 'wp/wiki'
|
52
|
+
|
53
|
+
if opts.server?
|
54
|
+
# Run Webserver
|
55
|
+
|
56
|
+
require 'wp/webserver'
|
57
|
+
|
58
|
+
elsif opts.reindex?
|
59
|
+
#
|
60
|
+
|
61
|
+
elsif opts.update?
|
62
|
+
#
|
63
|
+
|
64
|
+
else
|
65
|
+
# CLI interface
|
66
|
+
query = ARGV.join(" ")
|
67
|
+
wiki = Wiki.new
|
68
|
+
|
69
|
+
titles = wiki.search(query, 20)
|
70
|
+
|
71
|
+
if titles.size == 1
|
72
|
+
title = titles.first
|
73
|
+
else
|
74
|
+
titles.each.with_index do |title, i|
|
75
|
+
puts "#{i+1}. #{title}"
|
76
|
+
end
|
77
|
+
|
78
|
+
print "> "
|
79
|
+
|
80
|
+
loop do
|
81
|
+
case STDIN.gets.strip
|
82
|
+
when ""
|
83
|
+
exit 1
|
84
|
+
when /^(\d+)$/
|
85
|
+
n = $1.to_i - 1
|
86
|
+
title = titles[n]
|
87
|
+
break
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
a = wiki.article(title)
|
94
|
+
|
95
|
+
html = []
|
96
|
+
html << "<title>#{a.title}</title>"
|
97
|
+
html << "<center><h1>#{a.title}</h1></center>"
|
98
|
+
html << "<center>(redirected from: <b>#{a.redirected_from}</b>)</center>" if a.redirected_from
|
99
|
+
html << a.html
|
100
|
+
|
101
|
+
console_viewer html.join("\n")
|
102
|
+
# ws = Thread.new { require 'wp/webserver' }
|
103
|
+
|
104
|
+
# system("luakit", "http://localhost:4567/#{CGI.escape title}")
|
105
|
+
end
|
106
|
+
|
data/lib/wp.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
|
2
|
+
require 'wp/wiki'
|
3
|
+
|
4
|
+
#####################################################################
|
5
|
+
|
6
|
+
def commatize(thing)
|
7
|
+
thing.to_s.reverse.each_slice(3).to_a.join(",").reverse
|
8
|
+
end
|
9
|
+
|
10
|
+
#####################################################################
|
11
|
+
|
12
|
+
class Downloader
|
13
|
+
|
14
|
+
def mech
|
15
|
+
require 'mechanize'
|
16
|
+
@mech ||= Mechanize.new
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
#####################################################################
|
22
|
+
|
23
|
+
def console_viewer(html)
|
24
|
+
IO.popen("lynx -stdin", "w") do |lynx|
|
25
|
+
# IO.popen("w3m -T text/html", "w") do |lynx|
|
26
|
+
lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
|
27
|
+
end
|
28
|
+
end
|
data/lib/wp/config.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'epitools'
|
2
|
+
|
3
|
+
module Pedia
|
4
|
+
class Config
|
5
|
+
|
6
|
+
# Constants #########################################################
|
7
|
+
|
8
|
+
CONFIG_DIR = File.expand_path "~/.config/pedia"
|
9
|
+
CACHE_DIR = File.expand_path "~/.cache/pedia"
|
10
|
+
CONFIG_FILE = File.join CONFIG_DIR, "config.yml"
|
11
|
+
|
12
|
+
# DSL ###############################################################
|
13
|
+
|
14
|
+
class Option < TypedStruct["name value default prompt type:symbol"]
|
15
|
+
|
16
|
+
def value
|
17
|
+
@value ||= @default
|
18
|
+
end
|
19
|
+
|
20
|
+
def init
|
21
|
+
case type
|
22
|
+
when :dir
|
23
|
+
File.mkdir value
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def self.option(*args)
|
31
|
+
opt = Option.new(*args)
|
32
|
+
|
33
|
+
attr_accessor opt.name
|
34
|
+
|
35
|
+
@@spec ||= {}
|
36
|
+
@@spec[opt.name] = opt
|
37
|
+
end
|
38
|
+
|
39
|
+
# Options ###########################################################
|
40
|
+
|
41
|
+
option name: "db_path",
|
42
|
+
prompt: "Where should the database be stored (approx. 8 gigs)?",
|
43
|
+
default: CACHE_DIR,
|
44
|
+
type: :dir
|
45
|
+
|
46
|
+
option name: "xml_path",
|
47
|
+
prompt: "Where should the XML be stored (approx. 8 gigs)?",
|
48
|
+
default: CACHE_DIR,
|
49
|
+
type: :dir
|
50
|
+
|
51
|
+
|
52
|
+
# Methods ###########################################################
|
53
|
+
|
54
|
+
def initialize(filename="config.yml")
|
55
|
+
Dir.mkdir CONFIG_DIR unless File.exists? CONFIG_DIR
|
56
|
+
|
57
|
+
unless File.exists? CONFIG_FILE
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
yaml = YAML.load open(CONFIG_FILE)
|
62
|
+
|
63
|
+
@options = {}
|
64
|
+
|
65
|
+
yaml.each do |key, val|
|
66
|
+
p [key, val]
|
67
|
+
if opt = @@spec[key]
|
68
|
+
@options[key] = opt.with(value: val)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def options
|
75
|
+
@options
|
76
|
+
end
|
77
|
+
|
78
|
+
end # class Config
|
79
|
+
end # module Pedia
|
data/lib/wp/old/wiki.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'leveldb'
|
2
|
+
|
3
|
+
# https://github.com/nricciar/wikicloth
|
4
|
+
require 'wikicloth'
|
5
|
+
|
6
|
+
class Wiki
|
7
|
+
|
8
|
+
def initialize(path="enwiki")
|
9
|
+
end
|
10
|
+
|
11
|
+
def [](key)
|
12
|
+
Zlib.inflate @db.get(key)
|
13
|
+
end
|
14
|
+
|
15
|
+
def titles(max=30)
|
16
|
+
search("", max)
|
17
|
+
end
|
18
|
+
|
19
|
+
def search(prefix, max=30)
|
20
|
+
@db.fwmkeys(prefix, max)
|
21
|
+
end
|
22
|
+
|
23
|
+
def html(title)
|
24
|
+
WikiCloth::Parser.new(data: self[title]).to_html
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
if $0 == __FILE__
|
31
|
+
wiki = Wiki.new
|
32
|
+
pp wiki.titles
|
33
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
# <title>AccessibleComputing</title>
|
4
|
+
# <ns>0</ns>
|
5
|
+
# <id>10</id>
|
6
|
+
# <redirect title="Computer accessibility" />
|
7
|
+
# <revision>
|
8
|
+
# <sha1>lo15ponaybcg2sf49sstw9gdjmdetnk</sha1>
|
9
|
+
# <format>text/x-wiki</format>
|
10
|
+
# <text>...ARTICLE...</text>
|
11
|
+
# </revision>
|
12
|
+
|
13
|
+
|
14
|
+
class XMLReader
|
15
|
+
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
def initialize(filename)
|
19
|
+
if filename[/\.bz2/]
|
20
|
+
io = IO.popen ["bunzip2", "-c", filename]
|
21
|
+
else
|
22
|
+
io = open(filename)
|
23
|
+
end
|
24
|
+
|
25
|
+
@reader = Nokogiri::XML::Reader io
|
26
|
+
end
|
27
|
+
|
28
|
+
def each
|
29
|
+
info = {}
|
30
|
+
|
31
|
+
@reader.each do |node|
|
32
|
+
|
33
|
+
case node.name
|
34
|
+
when "page"
|
35
|
+
if node.open?
|
36
|
+
info = {}
|
37
|
+
else
|
38
|
+
yield info
|
39
|
+
end
|
40
|
+
when "title", "text"
|
41
|
+
info[node.name] = node.text if node.open?
|
42
|
+
when "redirect"
|
43
|
+
info["redirect"] = node.attribute("title")
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
class Nokogiri::XML::Reader
|
55
|
+
def open?
|
56
|
+
node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
|
57
|
+
end
|
58
|
+
|
59
|
+
def closed?
|
60
|
+
node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
|
61
|
+
end
|
62
|
+
|
63
|
+
def text
|
64
|
+
inner_xml
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
data/lib/wp/webserver.rb
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'sinatra/base'
|
2
|
+
require 'wp/wiki'
|
3
|
+
require "sinatra/reloader" # if development?
|
4
|
+
|
5
|
+
$wiki = Wiki.new
|
6
|
+
|
7
|
+
class Webserver < Sinatra::Base
|
8
|
+
|
9
|
+
def wiki
|
10
|
+
$wiki
|
11
|
+
end
|
12
|
+
|
13
|
+
def link_to(title, desc=nil)
|
14
|
+
desc ||= title
|
15
|
+
"<a href=\"/#{title}\">#{desc}</a>"
|
16
|
+
end
|
17
|
+
|
18
|
+
def highlight(title, query)
|
19
|
+
title.gsub(query) { |m| "<span style='background: yellow'>#{m}</span>"}
|
20
|
+
end
|
21
|
+
|
22
|
+
def header(query="")
|
23
|
+
%{
|
24
|
+
<form action="search">
|
25
|
+
<input type="text" name="q" value="#{query}">
|
26
|
+
<input type="submit" value="search">
|
27
|
+
</form>
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
get "/" do
|
32
|
+
letter = ('A'..'Z').to_a.sample
|
33
|
+
titles = wiki.search(letter, 20)
|
34
|
+
|
35
|
+
links = titles.map { |title| "<li>#{link_to title}</li>" }
|
36
|
+
|
37
|
+
%{
|
38
|
+
#{header}
|
39
|
+
|
40
|
+
<h1>#{letter}</h1>
|
41
|
+
<ul>
|
42
|
+
#{links.join "\n"}
|
43
|
+
</ul>
|
44
|
+
}
|
45
|
+
end
|
46
|
+
|
47
|
+
get "/search" do
|
48
|
+
query = params[:q]
|
49
|
+
titles = wiki.search query
|
50
|
+
|
51
|
+
links = titles.map do |title|
|
52
|
+
desc = highlight title, query
|
53
|
+
"<li>#{link_to title, desc}</li>"
|
54
|
+
end
|
55
|
+
|
56
|
+
%{
|
57
|
+
#{header query }
|
58
|
+
|
59
|
+
<h1>Query: #{query}</h1>
|
60
|
+
<ul>
|
61
|
+
#{links.join "\n"}
|
62
|
+
</ul>
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
get %r{/(.+)} do
|
67
|
+
title = params[:captures].first
|
68
|
+
|
69
|
+
if article = wiki.article(title)
|
70
|
+
%{
|
71
|
+
#{header article.title}
|
72
|
+
|
73
|
+
<h1>#{article.title}</h1>
|
74
|
+
|
75
|
+
#{article.html}
|
76
|
+
|
77
|
+
<!--
|
78
|
+
#{article.xml.to_s}
|
79
|
+
-->
|
80
|
+
}
|
81
|
+
else
|
82
|
+
"No article found."
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
run!
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
class WikiParser < WikiCloth::Parser
|
93
|
+
|
94
|
+
url_for do |page|
|
95
|
+
"javascript:alert('You clicked on: #{page}');"
|
96
|
+
end
|
97
|
+
|
98
|
+
link_attributes_for do |page|
|
99
|
+
{ :href => url_for(page) }
|
100
|
+
end
|
101
|
+
|
102
|
+
template do |template|
|
103
|
+
"Hello {{{1}}}" if template == "hello"
|
104
|
+
end
|
105
|
+
|
106
|
+
external_link do |url,text|
|
107
|
+
"<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
# @wiki = WikiParser.new({
|
113
|
+
# :params => { "PAGENAME" => "Testing123" },
|
114
|
+
# :data => "{{hello|world}} From {{ PAGENAME }} -- [www.google.com]";
|
115
|
+
# })
|
data/lib/wp/wiki.rb
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
require 'leveldb'
|
2
|
+
require 'snappy'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'wikicloth'
|
5
|
+
|
6
|
+
#####################################################################
|
7
|
+
|
8
|
+
def bz2_stream(thing, offset=nil, length=nil)
|
9
|
+
require 'open3'
|
10
|
+
|
11
|
+
open(thing, "rb") do |bz2|
|
12
|
+
|
13
|
+
Open3.popen2("bunzip2", "-c") do |inp, outp, th|
|
14
|
+
Thread.new do
|
15
|
+
IO.copy_stream(bz2, inp, length, offset)
|
16
|
+
inp.close
|
17
|
+
end
|
18
|
+
|
19
|
+
yield outp
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
#####################################################################
|
27
|
+
|
28
|
+
class Article < Struct.new(:title, :text, :redirect, :xml, :redirected_from)
|
29
|
+
|
30
|
+
def initialize(page)
|
31
|
+
self.xml = page
|
32
|
+
self.title = page.at("title").text
|
33
|
+
|
34
|
+
if redirect = page.at("redirect")
|
35
|
+
self.redirect = redirect["title"]
|
36
|
+
else
|
37
|
+
self.text = page.at("text").text
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def redirect?
|
43
|
+
!!redirect
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
def to_html
|
48
|
+
# WikiCloth::Parser.new(data: text).to_html
|
49
|
+
WikiParser.new(data: text).to_html
|
50
|
+
end
|
51
|
+
|
52
|
+
alias_method :html, :to_html
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
#####################################################################
|
57
|
+
|
58
|
+
class XMLReader
|
59
|
+
|
60
|
+
include Enumerable
|
61
|
+
|
62
|
+
def initialize(input)
|
63
|
+
@doc = Nokogiri::XML.fragment input
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def each
|
68
|
+
@doc.search("page").each do |page|
|
69
|
+
yield Article.new(page)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
#####################################################################
|
76
|
+
|
77
|
+
class Wiki
|
78
|
+
#@@index_url = "http://dumps.wikimedia.org/enwiki/20130604/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
|
79
|
+
@@index_url = "/d/wiki/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
|
80
|
+
@@articles_url = "/d/wiki/enwiki-20130604-pages-articles-multistream.xml.bz2"
|
81
|
+
|
82
|
+
def initialize
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
def db
|
87
|
+
@db ||= LevelDB::DB.new File.expand_path("~/.cache/wp/enwiki-index")
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
def search(title, amount=40)
|
92
|
+
db.each(from: "title/#{title}").take(amount).map do |key, val|
|
93
|
+
key.split("/", 2).last
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def random(amount=30)
|
98
|
+
letters = [*'A'..'Z'] + [*'0'..'9']
|
99
|
+
|
100
|
+
(1..30).map do
|
101
|
+
prefix = letters.sample + letters.sample.downcase
|
102
|
+
key, val = db.each(from: prefix).first
|
103
|
+
key.split("/", 2).last
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def article(title, redirected_from=nil)
|
108
|
+
if title =~ %r{^title/(.+)}
|
109
|
+
title = $1
|
110
|
+
end
|
111
|
+
|
112
|
+
return nil unless offset = db["title/#{title}"]
|
113
|
+
|
114
|
+
offset = offset.to_i
|
115
|
+
length = db["length/#{offset}"].to_i
|
116
|
+
|
117
|
+
bz2_stream(@@articles_url, offset, length) do |io|
|
118
|
+
xml = XMLReader.new io.read
|
119
|
+
|
120
|
+
xml.each do |article|
|
121
|
+
|
122
|
+
if title == article.title
|
123
|
+
if article.redirect?
|
124
|
+
return article(article.redirect, title)
|
125
|
+
else
|
126
|
+
article.redirected_from = redirected_from
|
127
|
+
return article
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def import_index(index_url=@@index_url)
|
136
|
+
bz2_stream(index_url) do |io|
|
137
|
+
|
138
|
+
last_offset = nil
|
139
|
+
|
140
|
+
io.each_line.with_index do |line,i|
|
141
|
+
line.chomp!
|
142
|
+
|
143
|
+
offset, n, title = line.split(":", 3)
|
144
|
+
db["title/#{title}"] = offset
|
145
|
+
|
146
|
+
offset = offset.to_i
|
147
|
+
|
148
|
+
last_offset = offset if last_offset.nil?
|
149
|
+
|
150
|
+
if last_offset != offset
|
151
|
+
length = offset - last_offset
|
152
|
+
db["length/#{last_offset}"] = length.to_s
|
153
|
+
|
154
|
+
last_offset = offset
|
155
|
+
end
|
156
|
+
|
157
|
+
print "\e[1G#{commatize i} - #{title}\e[J" if i % 11337 == 0
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
#####################################################################
|
168
|
+
|
169
|
+
class WikiParser < WikiCloth::Parser
|
170
|
+
|
171
|
+
url_for do |page|
|
172
|
+
"javascript:alert('You clicked on: #{page}');"
|
173
|
+
end
|
174
|
+
|
175
|
+
link_attributes_for do |page|
|
176
|
+
{ :href => url_for(page) }
|
177
|
+
end
|
178
|
+
|
179
|
+
template do |template|
|
180
|
+
"Hello {{{1}}}" if template == "hello"
|
181
|
+
end
|
182
|
+
|
183
|
+
external_link do |url,text|
|
184
|
+
"<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
#####################################################################
|
metadata
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wp
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- epitron
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-07-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: slop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: leveldb-ruby
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: snappy
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sinatra
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: wikicloth
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: sinatra-contrib
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
description: Imports Wikipedia dumps to a local database and lets you browse them
|
98
|
+
from the commandline or with a local sinatra server.
|
99
|
+
email: chris@ill-logic.com
|
100
|
+
executables:
|
101
|
+
- wp
|
102
|
+
extensions: []
|
103
|
+
extra_rdoc_files:
|
104
|
+
- README.md
|
105
|
+
- LICENSE
|
106
|
+
files:
|
107
|
+
- bin/wp
|
108
|
+
- lib/wp.rb
|
109
|
+
- lib/wp/wiki.rb
|
110
|
+
- lib/wp/webserver.rb
|
111
|
+
- lib/wp/config.rb
|
112
|
+
- lib/wp/old/wiki.rb
|
113
|
+
- lib/wp/old/xmlreader.rb
|
114
|
+
- README.md
|
115
|
+
- LICENSE
|
116
|
+
homepage: http://github.com/epitron/wp/
|
117
|
+
licenses:
|
118
|
+
- WTFPL
|
119
|
+
metadata: {}
|
120
|
+
post_install_message:
|
121
|
+
rdoc_options: []
|
122
|
+
require_paths:
|
123
|
+
- lib
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - '>='
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
requirements: []
|
135
|
+
rubyforge_project:
|
136
|
+
rubygems_version: 2.0.3
|
137
|
+
signing_key:
|
138
|
+
specification_version: 4
|
139
|
+
summary: A local Wikipedia with commandline and web interfaces.
|
140
|
+
test_files: []
|
141
|
+
has_rdoc:
|