wp 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ab7e33f36703b9dad5ab221b6232041779eca3a5
4
+ data.tar.gz: b5475589d63447669a7319aba5923519d80a1a16
5
+ SHA512:
6
+ metadata.gz: 6f9614e78691b8c6a0d09ef2247d18e42bbb09e63855e49cdeb0803870071814dde4512bd0c9f21d0ba18b62eadfd74b4346f767e36dc54f92454c0f12c7b59a
7
+ data.tar.gz: fceac62168a01353bd64daa27055a24c9e5350c7266e853f5289bf643f3a8bedd7ca2302fbb7d94411604132b718eba9df4050c2dc053bcd703f6aeba3ba2e83
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Chris Gahan
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ Writeme!
data/bin/wp ADDED
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'cgi'
4
+
5
+ ###########################################################
6
+ # TODOs:
7
+ # [ ] Handle redirects (in Wiki#article)
8
+ # [ ] Render templates
9
+ # [ ] Remove "edit" links
10
+ # [ ] Don't break on pages with & in them
11
+ # [x] handle redirects
12
+ # [ ] store redirects in leveldb
13
+ # (update leveldb as the redirects are discovered)
14
+ # [ ] Random titles (lookup 30 random letter pairs)
15
+
16
+ ###########################################################
17
+
18
+ def console_viewer(html)
19
+ # IO.popen("lynx -stdin", "w") do |lynx|
20
+ IO.popen("w3m -T text/html", "w") do |lynx|
21
+ lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
22
+ end
23
+ end
24
+
25
+ ###########################################################
26
+
27
+ require 'slop' # lazy loaded
28
+
29
+ opts = Slop.parse(help: true, strict: true) do
30
+ banner 'Usage: wp [options] <query...>'
31
+
32
+ on 's', 'server', 'Launch server'
33
+ on 'p=', 'port', 'Webserver port (default: 4567)'
34
+ on 'd', 'download', 'Download the latest dump'
35
+ # on 'u', 'update', 'Update the database (takes a LONG time)'
36
+ on 'i=', 'import', 'Import dump'
37
+ on 'o=', 'outfile', 'Where to output the file'
38
+ on 'c', 'config', 'Interactive config'
39
+
40
+ # command 'server' do
41
+ # on "p", "port", 'Port (default: 3000)', default: 3000
42
+
43
+ # run do |opts, args|
44
+ # server(opts.port)
45
+ # end
46
+ # end
47
+ end
48
+
49
+ ###########################################################
50
+
51
+ require 'wp/wiki'
52
+
53
+ if opts.server?
54
+ # Run Webserver
55
+
56
+ require 'wp/webserver'
57
+
58
+ elsif opts.reindex?
59
+ #
60
+
61
+ elsif opts.update?
62
+ #
63
+
64
+ else
65
+ # CLI interface
66
+ query = ARGV.join(" ")
67
+ wiki = Wiki.new
68
+
69
+ titles = wiki.search(query, 20)
70
+
71
+ if titles.size == 1
72
+ title = titles.first
73
+ else
74
+ titles.each.with_index do |title, i|
75
+ puts "#{i+1}. #{title}"
76
+ end
77
+
78
+ print "> "
79
+
80
+ loop do
81
+ case STDIN.gets.strip
82
+ when ""
83
+ exit 1
84
+ when /^(\d+)$/
85
+ n = $1.to_i - 1
86
+ title = titles[n]
87
+ break
88
+ end
89
+ end
90
+
91
+ end
92
+
93
+ a = wiki.article(title)
94
+
95
+ html = []
96
+ html << "<title>#{a.title}</title>"
97
+ html << "<center><h1>#{a.title}</h1></center>"
98
+ html << "<center>(redirected from: <b>#{a.redirected_from}</b>)</center>" if a.redirected_from
99
+ html << a.html
100
+
101
+ console_viewer html.join("\n")
102
+ # ws = Thread.new { require 'wp/webserver' }
103
+
104
+ # system("luakit", "http://localhost:4567/#{CGI.escape title}")
105
+ end
106
+
@@ -0,0 +1,28 @@
1
+
2
+ require 'wp/wiki'
3
+
4
+ #####################################################################
5
+
6
+ def commatize(thing)
7
+ thing.to_s.reverse.each_slice(3).to_a.join(",").reverse
8
+ end
9
+
10
+ #####################################################################
11
+
12
+ class Downloader
13
+
14
+ def mech
15
+ require 'mechanize'
16
+ @mech ||= Mechanize.new
17
+ end
18
+
19
+ end
20
+
21
+ #####################################################################
22
+
23
+ def console_viewer(html)
24
+ IO.popen("lynx -stdin", "w") do |lynx|
25
+ # IO.popen("w3m -T text/html", "w") do |lynx|
26
+ lynx.puts html # "<h1>#{article.title}</h1>\n#{html}"
27
+ end
28
+ end
@@ -0,0 +1,79 @@
1
+ require 'epitools'
2
+
3
+ module Pedia
4
+ class Config
5
+
6
+ # Constants #########################################################
7
+
8
+ CONFIG_DIR = File.expand_path "~/.config/pedia"
9
+ CACHE_DIR = File.expand_path "~/.cache/pedia"
10
+ CONFIG_FILE = File.join CONFIG_DIR, "config.yml"
11
+
12
+ # DSL ###############################################################
13
+
14
+ class Option < TypedStruct["name value default prompt type:symbol"]
15
+
16
+ def value
17
+ @value ||= @default
18
+ end
19
+
20
+ def init
21
+ case type
22
+ when :dir
23
+ File.mkdir value
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+
30
+ def self.option(*args)
31
+ opt = Option.new(*args)
32
+
33
+ attr_accessor opt.name
34
+
35
+ @@spec ||= {}
36
+ @@spec[opt.name] = opt
37
+ end
38
+
39
+ # Options ###########################################################
40
+
41
+ option name: "db_path",
42
+ prompt: "Where should the database be stored (approx. 8 gigs)?",
43
+ default: CACHE_DIR,
44
+ type: :dir
45
+
46
+ option name: "xml_path",
47
+ prompt: "Where should the XML be stored (approx. 8 gigs)?",
48
+ default: CACHE_DIR,
49
+ type: :dir
50
+
51
+
52
+ # Methods ###########################################################
53
+
54
+ def initialize(filename="config.yml")
55
+ Dir.mkdir CONFIG_DIR unless File.exists? CONFIG_DIR
56
+
57
+ unless File.exists? CONFIG_FILE
58
+
59
+ end
60
+
61
+ yaml = YAML.load open(CONFIG_FILE)
62
+
63
+ @options = {}
64
+
65
+ yaml.each do |key, val|
66
+ p [key, val]
67
+ if opt = @@spec[key]
68
+ @options[key] = opt.with(value: val)
69
+ end
70
+ end
71
+ end
72
+
73
+
74
+ def options
75
+ @options
76
+ end
77
+
78
+ end # class Config
79
+ end # module Pedia
@@ -0,0 +1,33 @@
1
+ require 'leveldb'
2
+
3
+ # https://github.com/nricciar/wikicloth
4
+ require 'wikicloth'
5
+
6
+ class Wiki
7
+
8
+ def initialize(path="enwiki")
9
+ end
10
+
11
+ def [](key)
12
+ Zlib.inflate @db.get(key)
13
+ end
14
+
15
+ def titles(max=30)
16
+ search("", max)
17
+ end
18
+
19
+ def search(prefix, max=30)
20
+ @db.fwmkeys(prefix, max)
21
+ end
22
+
23
+ def html(title)
24
+ WikiCloth::Parser.new(data: self[title]).to_html
25
+ end
26
+
27
+ end
28
+
29
+
30
+ if $0 == __FILE__
31
+ wiki = Wiki.new
32
+ pp wiki.titles
33
+ end
@@ -0,0 +1,67 @@
1
+ require 'nokogiri'
2
+
3
+ # <title>AccessibleComputing</title>
4
+ # <ns>0</ns>
5
+ # <id>10</id>
6
+ # <redirect title="Computer accessibility" />
7
+ # <revision>
8
+ # <sha1>lo15ponaybcg2sf49sstw9gdjmdetnk</sha1>
9
+ # <format>text/x-wiki</format>
10
+ # <text>...ARTICLE...</text>
11
+ # </revision>
12
+
13
+
14
+ class XMLReader
15
+
16
+ include Enumerable
17
+
18
+ def initialize(filename)
19
+ if filename[/\.bz2/]
20
+ io = IO.popen ["bunzip2", "-c", filename]
21
+ else
22
+ io = open(filename)
23
+ end
24
+
25
+ @reader = Nokogiri::XML::Reader io
26
+ end
27
+
28
+ def each
29
+ info = {}
30
+
31
+ @reader.each do |node|
32
+
33
+ case node.name
34
+ when "page"
35
+ if node.open?
36
+ info = {}
37
+ else
38
+ yield info
39
+ end
40
+ when "title", "text"
41
+ info[node.name] = node.text if node.open?
42
+ when "redirect"
43
+ info["redirect"] = node.attribute("title")
44
+ end
45
+
46
+ end
47
+ end
48
+
49
+ end
50
+
51
+
52
+
53
+
54
+ class Nokogiri::XML::Reader
55
+ def open?
56
+ node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
57
+ end
58
+
59
+ def closed?
60
+ node_type == Nokogiri::XML::Reader::TYPE_END_ELEMENT
61
+ end
62
+
63
+ def text
64
+ inner_xml
65
+ end
66
+ end
67
+
@@ -0,0 +1,115 @@
1
+ require 'sinatra/base'
2
+ require 'wp/wiki'
3
+ require "sinatra/reloader" # if development?
4
+
5
+ $wiki = Wiki.new
6
+
7
+ class Webserver < Sinatra::Base
8
+
9
+ def wiki
10
+ $wiki
11
+ end
12
+
13
+ def link_to(title, desc=nil)
14
+ desc ||= title
15
+ "<a href=\"/#{title}\">#{desc}</a>"
16
+ end
17
+
18
+ def highlight(title, query)
19
+ title.gsub(query) { |m| "<span style='background: yellow'>#{m}</span>"}
20
+ end
21
+
22
+ def header(query="")
23
+ %{
24
+ <form action="search">
25
+ <input type="text" name="q" value="#{query}">
26
+ <input type="submit" value="search">
27
+ </form>
28
+ }
29
+ end
30
+
31
+ get "/" do
32
+ letter = ('A'..'Z').to_a.sample
33
+ titles = wiki.search(letter, 20)
34
+
35
+ links = titles.map { |title| "<li>#{link_to title}</li>" }
36
+
37
+ %{
38
+ #{header}
39
+
40
+ <h1>#{letter}</h1>
41
+ <ul>
42
+ #{links.join "\n"}
43
+ </ul>
44
+ }
45
+ end
46
+
47
+ get "/search" do
48
+ query = params[:q]
49
+ titles = wiki.search query
50
+
51
+ links = titles.map do |title|
52
+ desc = highlight title, query
53
+ "<li>#{link_to title, desc}</li>"
54
+ end
55
+
56
+ %{
57
+ #{header query }
58
+
59
+ <h1>Query: #{query}</h1>
60
+ <ul>
61
+ #{links.join "\n"}
62
+ </ul>
63
+ }
64
+ end
65
+
66
+ get %r{/(.+)} do
67
+ title = params[:captures].first
68
+
69
+ if article = wiki.article(title)
70
+ %{
71
+ #{header article.title}
72
+
73
+ <h1>#{article.title}</h1>
74
+
75
+ #{article.html}
76
+
77
+ <!--
78
+ #{article.xml.to_s}
79
+ -->
80
+ }
81
+ else
82
+ "No article found."
83
+ end
84
+ end
85
+
86
+ run!
87
+
88
+ end
89
+
90
+
91
+
92
+ class WikiParser < WikiCloth::Parser
93
+
94
+ url_for do |page|
95
+ "javascript:alert('You clicked on: #{page}');"
96
+ end
97
+
98
+ link_attributes_for do |page|
99
+ { :href => url_for(page) }
100
+ end
101
+
102
+ template do |template|
103
+ "Hello {{{1}}}" if template == "hello"
104
+ end
105
+
106
+ external_link do |url,text|
107
+ "<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
108
+ end
109
+
110
+ end
111
+
112
+ # @wiki = WikiParser.new({
113
+ # :params => { "PAGENAME" => "Testing123" },
114
+ # :data => "{{hello|world}} From {{ PAGENAME }} -- [www.google.com]";
115
+ # })
@@ -0,0 +1,189 @@
1
+ require 'leveldb'
2
+ require 'snappy'
3
+ require 'nokogiri'
4
+ require 'wikicloth'
5
+
6
+ #####################################################################
7
+
8
+ def bz2_stream(thing, offset=nil, length=nil)
9
+ require 'open3'
10
+
11
+ open(thing, "rb") do |bz2|
12
+
13
+ Open3.popen2("bunzip2", "-c") do |inp, outp, th|
14
+ Thread.new do
15
+ IO.copy_stream(bz2, inp, length, offset)
16
+ inp.close
17
+ end
18
+
19
+ yield outp
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+
26
+ #####################################################################
27
+
28
+ class Article < Struct.new(:title, :text, :redirect, :xml, :redirected_from)
29
+
30
+ def initialize(page)
31
+ self.xml = page
32
+ self.title = page.at("title").text
33
+
34
+ if redirect = page.at("redirect")
35
+ self.redirect = redirect["title"]
36
+ else
37
+ self.text = page.at("text").text
38
+ end
39
+ end
40
+
41
+
42
+ def redirect?
43
+ !!redirect
44
+ end
45
+
46
+
47
+ def to_html
48
+ # WikiCloth::Parser.new(data: text).to_html
49
+ WikiParser.new(data: text).to_html
50
+ end
51
+
52
+ alias_method :html, :to_html
53
+
54
+ end
55
+
56
+ #####################################################################
57
+
58
+ class XMLReader
59
+
60
+ include Enumerable
61
+
62
+ def initialize(input)
63
+ @doc = Nokogiri::XML.fragment input
64
+ end
65
+
66
+
67
+ def each
68
+ @doc.search("page").each do |page|
69
+ yield Article.new(page)
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ #####################################################################
76
+
77
+ class Wiki
78
+ #@@index_url = "http://dumps.wikimedia.org/enwiki/20130604/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
79
+ @@index_url = "/d/wiki/enwiki-20130604-pages-articles-multistream-index.txt.bz2"
80
+ @@articles_url = "/d/wiki/enwiki-20130604-pages-articles-multistream.xml.bz2"
81
+
82
+ def initialize
83
+ end
84
+
85
+
86
+ def db
87
+ @db ||= LevelDB::DB.new File.expand_path("~/.cache/wp/enwiki-index")
88
+ end
89
+
90
+
91
+ def search(title, amount=40)
92
+ db.each(from: "title/#{title}").take(amount).map do |key, val|
93
+ key.split("/", 2).last
94
+ end
95
+ end
96
+
97
+ def random(amount=30)
98
+ letters = [*'A'..'Z'] + [*'0'..'9']
99
+
100
+ (1..30).map do
101
+ prefix = letters.sample + letters.sample.downcase
102
+ key, val = db.each(from: prefix).first
103
+ key.split("/", 2).last
104
+ end
105
+ end
106
+
107
+ def article(title, redirected_from=nil)
108
+ if title =~ %r{^title/(.+)}
109
+ title = $1
110
+ end
111
+
112
+ return nil unless offset = db["title/#{title}"]
113
+
114
+ offset = offset.to_i
115
+ length = db["length/#{offset}"].to_i
116
+
117
+ bz2_stream(@@articles_url, offset, length) do |io|
118
+ xml = XMLReader.new io.read
119
+
120
+ xml.each do |article|
121
+
122
+ if title == article.title
123
+ if article.redirect?
124
+ return article(article.redirect, title)
125
+ else
126
+ article.redirected_from = redirected_from
127
+ return article
128
+ end
129
+ end
130
+
131
+ end
132
+ end
133
+ end
134
+
135
+ def import_index(index_url=@@index_url)
136
+ bz2_stream(index_url) do |io|
137
+
138
+ last_offset = nil
139
+
140
+ io.each_line.with_index do |line,i|
141
+ line.chomp!
142
+
143
+ offset, n, title = line.split(":", 3)
144
+ db["title/#{title}"] = offset
145
+
146
+ offset = offset.to_i
147
+
148
+ last_offset = offset if last_offset.nil?
149
+
150
+ if last_offset != offset
151
+ length = offset - last_offset
152
+ db["length/#{last_offset}"] = length.to_s
153
+
154
+ last_offset = offset
155
+ end
156
+
157
+ print "\e[1G#{commatize i} - #{title}\e[J" if i % 11337 == 0
158
+
159
+ end
160
+
161
+ end
162
+ end
163
+
164
+
165
+ end
166
+
167
+ #####################################################################
168
+
169
+ class WikiParser < WikiCloth::Parser
170
+
171
+ url_for do |page|
172
+ "javascript:alert('You clicked on: #{page}');"
173
+ end
174
+
175
+ link_attributes_for do |page|
176
+ { :href => url_for(page) }
177
+ end
178
+
179
+ template do |template|
180
+ "Hello {{{1}}}" if template == "hello"
181
+ end
182
+
183
+ external_link do |url,text|
184
+ "<a href=\"#{url}\" target=\"_blank\" class=\"exlink\">#{text.blank? ? url : text}</a>"
185
+ end
186
+
187
+ end
188
+
189
+ #####################################################################
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wp
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - epitron
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-07-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: slop
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: leveldb-ruby
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: snappy
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: sinatra
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: wikicloth
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: sinatra-contrib
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Imports Wikipedia dumps to a local database and lets you browse them
98
+ from the commandline or with a local sinatra server.
99
+ email: chris@ill-logic.com
100
+ executables:
101
+ - wp
102
+ extensions: []
103
+ extra_rdoc_files:
104
+ - README.md
105
+ - LICENSE
106
+ files:
107
+ - bin/wp
108
+ - lib/wp.rb
109
+ - lib/wp/wiki.rb
110
+ - lib/wp/webserver.rb
111
+ - lib/wp/config.rb
112
+ - lib/wp/old/wiki.rb
113
+ - lib/wp/old/xmlreader.rb
114
+ - README.md
115
+ - LICENSE
116
+ homepage: http://github.com/epitron/wp/
117
+ licenses:
118
+ - WTFPL
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - '>='
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.0.3
137
+ signing_key:
138
+ specification_version: 4
139
+ summary: A local Wikipedia with commandline and web interfaces.
140
+ test_files: []
141
+ has_rdoc: