wikiscript 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/{HISTORY.md → CHANGELOG.md} +0 -0
- data/LICENSE.md +116 -0
- data/Manifest.txt +18 -11
- data/NOTES.md +6 -0
- data/README.md +12 -14
- data/Rakefile +4 -5
- data/lib/wikiscript/page.rb +12 -9
- data/lib/wikiscript/page_reader.rb +65 -0
- data/lib/wikiscript/table_reader.rb +109 -0
- data/lib/wikiscript/version.rb +9 -2
- data/lib/wikiscript.rb +56 -9
- data/test/helper.rb +0 -5
- data/test/test_link.rb +31 -0
- data/test/test_page.rb +8 -9
- data/test/test_page_de.rb +5 -6
- data/test/test_page_reader.rb +80 -0
- data/test/test_table_reader.rb +81 -0
- metadata +51 -35
- data/.gemtest +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e86e3e1b50b44067a5815a13155b04aa441e8023
|
4
|
+
data.tar.gz: c38ed306346a9b3d2c7c365fba00c41bee4624b4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 290c18216f59d7c2e6f2e1b181651413790fd2f7ff709c7f6f9c9cf1708b540480efa8827c57613df6862b1da755cd584c53f71ff93b07fe34a7a2ef4a67420d
|
7
|
+
data.tar.gz: 9e0c5c402b17263d38065eaac1a9e1febee6f3a73dff382aae8d81a8d0436881a2437a784dcee83f69c50d1fb5c6c5f385458a90ebb2cff8a31d31fffbd951a5
|
data/{HISTORY.md → CHANGELOG.md}
RENAMED
File without changes
|
data/LICENSE.md
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
CC0 1.0 Universal
|
2
|
+
|
3
|
+
Statement of Purpose
|
4
|
+
|
5
|
+
The laws of most jurisdictions throughout the world automatically confer
|
6
|
+
exclusive Copyright and Related Rights (defined below) upon the creator and
|
7
|
+
subsequent owner(s) (each and all, an "owner") of an original work of
|
8
|
+
authorship and/or a database (each, a "Work").
|
9
|
+
|
10
|
+
Certain owners wish to permanently relinquish those rights to a Work for the
|
11
|
+
purpose of contributing to a commons of creative, cultural and scientific
|
12
|
+
works ("Commons") that the public can reliably and without fear of later
|
13
|
+
claims of infringement build upon, modify, incorporate in other works, reuse
|
14
|
+
and redistribute as freely as possible in any form whatsoever and for any
|
15
|
+
purposes, including without limitation commercial purposes. These owners may
|
16
|
+
contribute to the Commons to promote the ideal of a free culture and the
|
17
|
+
further production of creative, cultural and scientific works, or to gain
|
18
|
+
reputation or greater distribution for their Work in part through the use and
|
19
|
+
efforts of others.
|
20
|
+
|
21
|
+
For these and/or other purposes and motivations, and without any expectation
|
22
|
+
of additional consideration or compensation, the person associating CC0 with a
|
23
|
+
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
|
24
|
+
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
|
25
|
+
and publicly distribute the Work under its terms, with knowledge of his or her
|
26
|
+
Copyright and Related Rights in the Work and the meaning and intended legal
|
27
|
+
effect of CC0 on those rights.
|
28
|
+
|
29
|
+
1. Copyright and Related Rights. A Work made available under CC0 may be
|
30
|
+
protected by copyright and related or neighboring rights ("Copyright and
|
31
|
+
Related Rights"). Copyright and Related Rights include, but are not limited
|
32
|
+
to, the following:
|
33
|
+
|
34
|
+
i. the right to reproduce, adapt, distribute, perform, display, communicate,
|
35
|
+
and translate a Work;
|
36
|
+
|
37
|
+
ii. moral rights retained by the original author(s) and/or performer(s);
|
38
|
+
|
39
|
+
iii. publicity and privacy rights pertaining to a person's image or likeness
|
40
|
+
depicted in a Work;
|
41
|
+
|
42
|
+
iv. rights protecting against unfair competition in regards to a Work,
|
43
|
+
subject to the limitations in paragraph 4(a), below;
|
44
|
+
|
45
|
+
v. rights protecting the extraction, dissemination, use and reuse of data in
|
46
|
+
a Work;
|
47
|
+
|
48
|
+
vi. database rights (such as those arising under Directive 96/9/EC of the
|
49
|
+
European Parliament and of the Council of 11 March 1996 on the legal
|
50
|
+
protection of databases, and under any national implementation thereof,
|
51
|
+
including any amended or successor version of such directive); and
|
52
|
+
|
53
|
+
vii. other similar, equivalent or corresponding rights throughout the world
|
54
|
+
based on applicable law or treaty, and any national implementations thereof.
|
55
|
+
|
56
|
+
2. Waiver. To the greatest extent permitted by, but not in contravention of,
|
57
|
+
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
|
58
|
+
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
|
59
|
+
and Related Rights and associated claims and causes of action, whether now
|
60
|
+
known or unknown (including existing as well as future claims and causes of
|
61
|
+
action), in the Work (i) in all territories worldwide, (ii) for the maximum
|
62
|
+
duration provided by applicable law or treaty (including future time
|
63
|
+
extensions), (iii) in any current or future medium and for any number of
|
64
|
+
copies, and (iv) for any purpose whatsoever, including without limitation
|
65
|
+
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
|
66
|
+
the Waiver for the benefit of each member of the public at large and to the
|
67
|
+
detriment of Affirmer's heirs and successors, fully intending that such Waiver
|
68
|
+
shall not be subject to revocation, rescission, cancellation, termination, or
|
69
|
+
any other legal or equitable action to disrupt the quiet enjoyment of the Work
|
70
|
+
by the public as contemplated by Affirmer's express Statement of Purpose.
|
71
|
+
|
72
|
+
3. Public License Fallback. Should any part of the Waiver for any reason be
|
73
|
+
judged legally invalid or ineffective under applicable law, then the Waiver
|
74
|
+
shall be preserved to the maximum extent permitted taking into account
|
75
|
+
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
|
76
|
+
is so judged Affirmer hereby grants to each affected person a royalty-free,
|
77
|
+
non transferable, non sublicensable, non exclusive, irrevocable and
|
78
|
+
unconditional license to exercise Affirmer's Copyright and Related Rights in
|
79
|
+
the Work (i) in all territories worldwide, (ii) for the maximum duration
|
80
|
+
provided by applicable law or treaty (including future time extensions), (iii)
|
81
|
+
in any current or future medium and for any number of copies, and (iv) for any
|
82
|
+
purpose whatsoever, including without limitation commercial, advertising or
|
83
|
+
promotional purposes (the "License"). The License shall be deemed effective as
|
84
|
+
of the date CC0 was applied by Affirmer to the Work. Should any part of the
|
85
|
+
License for any reason be judged legally invalid or ineffective under
|
86
|
+
applicable law, such partial invalidity or ineffectiveness shall not
|
87
|
+
invalidate the remainder of the License, and in such case Affirmer hereby
|
88
|
+
affirms that he or she will not (i) exercise any of his or her remaining
|
89
|
+
Copyright and Related Rights in the Work or (ii) assert any associated claims
|
90
|
+
and causes of action with respect to the Work, in either case contrary to
|
91
|
+
Affirmer's express Statement of Purpose.
|
92
|
+
|
93
|
+
4. Limitations and Disclaimers.
|
94
|
+
|
95
|
+
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
96
|
+
surrendered, licensed or otherwise affected by this document.
|
97
|
+
|
98
|
+
b. Affirmer offers the Work as-is and makes no representations or warranties
|
99
|
+
of any kind concerning the Work, express, implied, statutory or otherwise,
|
100
|
+
including without limitation warranties of title, merchantability, fitness
|
101
|
+
for a particular purpose, non infringement, or the absence of latent or
|
102
|
+
other defects, accuracy, or the present or absence of errors, whether or not
|
103
|
+
discoverable, all to the greatest extent permissible under applicable law.
|
104
|
+
|
105
|
+
c. Affirmer disclaims responsibility for clearing rights of other persons
|
106
|
+
that may apply to the Work or any use thereof, including without limitation
|
107
|
+
any person's Copyright and Related Rights in the Work. Further, Affirmer
|
108
|
+
disclaims responsibility for obtaining any necessary consents, permissions
|
109
|
+
or other rights required for any use of the Work.
|
110
|
+
|
111
|
+
d. Affirmer understands and acknowledges that Creative Commons is not a
|
112
|
+
party to this document and has no duty or obligation with respect to this
|
113
|
+
CC0 or use of the Work.
|
114
|
+
|
115
|
+
For more information, please see
|
116
|
+
<http://creativecommons.org/publicdomain/zero/1.0/>
|
data/Manifest.txt
CHANGED
@@ -1,11 +1,18 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
lib/wikiscript
|
8
|
-
lib/wikiscript/
|
9
|
-
|
10
|
-
|
11
|
-
|
1
|
+
CHANGELOG.md
|
2
|
+
LICENSE.md
|
3
|
+
Manifest.txt
|
4
|
+
NOTES.md
|
5
|
+
README.md
|
6
|
+
Rakefile
|
7
|
+
lib/wikiscript.rb
|
8
|
+
lib/wikiscript/client.rb
|
9
|
+
lib/wikiscript/page.rb
|
10
|
+
lib/wikiscript/page_reader.rb
|
11
|
+
lib/wikiscript/table_reader.rb
|
12
|
+
lib/wikiscript/version.rb
|
13
|
+
test/helper.rb
|
14
|
+
test/test_link.rb
|
15
|
+
test/test_page.rb
|
16
|
+
test/test_page_de.rb
|
17
|
+
test/test_page_reader.rb
|
18
|
+
test/test_table_reader.rb
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# wikiscript - scripts for wikipedia (get wikitext for page etc.)
|
2
2
|
|
3
|
-
* home :: [github.com/wikiscript/wikiscript
|
4
|
-
* bugs :: [github.com/wikiscript/wikiscript
|
3
|
+
* home :: [github.com/wikiscript/wikiscript](https://github.com/wikiscript/wikiscript)
|
4
|
+
* bugs :: [github.com/wikiscript/wikiscript/issues](https://github.com/wikiscript/wikiscript/issues)
|
5
5
|
* gem :: [rubygems.org/gems/wikiscript](https://rubygems.org/gems/wikiscript)
|
6
6
|
* rdoc :: [rubydoc.info/gems/wikiscript](http://rubydoc.info/gems/wikiscript)
|
7
7
|
|
@@ -12,13 +12,16 @@ Read-only access to wikikpedia pages.
|
|
12
12
|
Example - Get wikitext source (via `en.wikipedia.org/w/index.php?action=raw&title=<title>`):
|
13
13
|
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
```
|
16
|
+
>> page = Wikiscript::Page.new( '2014_FIFA_World_Cup_squads' )
|
17
|
+
>> page.text
|
18
|
+
|
19
|
+
The [[2014 FIFA World Cup]] is an international [[association football|football]]
|
20
|
+
tournament which is currently being held in Brazil from 12 June to 13 July 2014.
|
21
|
+
The 32 national teams involved in the tournament were required to register
|
22
|
+
a squad of 23 players, including three goalkeepers...
|
23
|
+
```
|
24
|
+
|
22
25
|
|
23
26
|
|
24
27
|
## Install
|
@@ -28,11 +31,6 @@ Just install the gem:
|
|
28
31
|
$ gem install wikiscript
|
29
32
|
|
30
33
|
|
31
|
-
## Alternatives
|
32
|
-
|
33
|
-
TBD
|
34
|
-
|
35
|
-
|
36
34
|
## License
|
37
35
|
|
38
36
|
The `wikiscript` scripts are dedicated to the public domain.
|
data/Rakefile
CHANGED
@@ -8,14 +8,14 @@ Hoe.spec 'wikiscript' do
|
|
8
8
|
self.summary = 'wikiscript - scripts for wikipedia (get wikitext for page etc.)'
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/wikiscript/wikiscript
|
11
|
+
self.urls = ['https://github.com/wikiscript/wikiscript']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
14
|
self.email = 'opensport@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file
|
18
|
-
self.history_file = '
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
19
19
|
|
20
20
|
self.extra_deps = [
|
21
21
|
['logutils' ],
|
@@ -25,7 +25,6 @@ Hoe.spec 'wikiscript' do
|
|
25
25
|
self.licenses = ['Public Domain']
|
26
26
|
|
27
27
|
self.spec_extras = {
|
28
|
-
|
28
|
+
required_ruby_version: '>= 2.2.2'
|
29
29
|
}
|
30
|
-
|
31
30
|
end
|
data/lib/wikiscript/page.rb
CHANGED
@@ -6,24 +6,27 @@ module Wikiscript
|
|
6
6
|
|
7
7
|
include LogUtils::Logging
|
8
8
|
|
9
|
-
attr_reader :title
|
9
|
+
attr_reader :title
|
10
10
|
|
11
|
-
def initialize( title )
|
11
|
+
def initialize( title, text: nil )
|
12
12
|
## todo: check title
|
13
13
|
## replace title spaces w/ _ ????
|
14
14
|
## to allow "pretty" titles - why? why not??
|
15
15
|
|
16
16
|
@title = title
|
17
|
-
@text =
|
17
|
+
@text = text
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def text
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
@
|
21
|
+
@text ||= download_text # cache text (from request)
|
22
|
+
end
|
23
|
+
|
24
|
+
def download_text
|
25
|
+
Client.new.text( @title )
|
26
26
|
end
|
27
27
|
|
28
|
+
def parse ## todo/change: use/find a different name e.g. doc/elements/etc. - why? why not?
|
29
|
+
PageReader.parse( text )
|
30
|
+
end
|
28
31
|
end # class Page
|
29
32
|
end # Wikiscript
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Wikiscript
|
4
|
+
|
5
|
+
class PageReader
|
6
|
+
|
7
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
8
|
+
txt = File.open( path, 'r:utf-8' ).read
|
9
|
+
parse( txt )
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
def self.parse( txt )
|
14
|
+
page = [] ## page structure
|
15
|
+
|
16
|
+
inside_table = false
|
17
|
+
table_txt = nil
|
18
|
+
|
19
|
+
txt.each_line do |line|
|
20
|
+
line = line.strip
|
21
|
+
|
22
|
+
break if line == '__END__'
|
23
|
+
|
24
|
+
## note: allow/add comments
|
25
|
+
## note: CANNOT allow inline (end-of-line) comments
|
26
|
+
## would strip/break css colors eg. bgcolor=#ffff44
|
27
|
+
next if line.start_with?( '#' ) ## skip comments too
|
28
|
+
next if line.empty? ## skip empty lines for now
|
29
|
+
|
30
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
31
|
+
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
32
|
+
if line =~ /^(={1,}) ## leading ======
|
33
|
+
([^=]+?) ## text (note: for now no "inline" = allowed)
|
34
|
+
=* ## (optional) trailing ====
|
35
|
+
$/x
|
36
|
+
heading_marker = $1
|
37
|
+
heading_level = $1.length ## count number of = for heading level
|
38
|
+
heading = $2.strip
|
39
|
+
|
40
|
+
puts "heading #{heading_level} >#{heading}<"
|
41
|
+
page << [:"h#{heading_level}", heading]
|
42
|
+
elsif line.start_with?( '{|' ) ## start table
|
43
|
+
inside_table = true
|
44
|
+
table_txt = String.new ## collect table source text
|
45
|
+
table_txt << line << "\n" ## note: do NOT forget to add back newline!!
|
46
|
+
elsif inside_table && line.start_with?( '|}' ) ## end table
|
47
|
+
table_txt << line << "\n"
|
48
|
+
table = TableReader.parse_table( table_txt )
|
49
|
+
page << [:table, table]
|
50
|
+
## reset table variables
|
51
|
+
inside_table = false
|
52
|
+
table_txt = nil
|
53
|
+
elsif inside_table
|
54
|
+
table_txt << line << "\n"
|
55
|
+
else
|
56
|
+
puts "** !!! ERROR !!! unknown line type in wiki page:"
|
57
|
+
pp line
|
58
|
+
exit 1
|
59
|
+
end
|
60
|
+
end
|
61
|
+
page
|
62
|
+
end # method parse
|
63
|
+
end # class PageReader
|
64
|
+
|
65
|
+
end # module Wikiscript
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Wikiscript
|
4
|
+
|
5
|
+
class TableReader
|
6
|
+
|
7
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
8
|
+
txt = File.open( path, 'r:utf-8' ).read
|
9
|
+
parse( txt )
|
10
|
+
end
|
11
|
+
|
12
|
+
|
13
|
+
def self.parse_table( txt ) ## only allow single table
|
14
|
+
tables = parse( txt )
|
15
|
+
|
16
|
+
if tables.size == 0
|
17
|
+
puts "** !!! ERROR !!! no table found in text"
|
18
|
+
exit 1
|
19
|
+
elsif tables.size > 1
|
20
|
+
puts "** !!! ERROR !!! too many tables (#{tables.size}) found in text; only one expected/allowed; sorry"
|
21
|
+
exit 1
|
22
|
+
else
|
23
|
+
tables[0] ## pass-along first table; everything ok
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.parse( txt )
|
28
|
+
tables = [] ## todo/check: allow multiple tables? why? why not?
|
29
|
+
|
30
|
+
rows = nil ## note: assume first row is the headers row!!
|
31
|
+
row = nil ## current row ## note: same as rows[-1]
|
32
|
+
|
33
|
+
inside_table = false
|
34
|
+
|
35
|
+
txt.each_line do |line|
|
36
|
+
line = line.strip
|
37
|
+
|
38
|
+
break if line == '__END__'
|
39
|
+
|
40
|
+
## note: allow/add comments
|
41
|
+
## note: CANNOT allow inline (end-of-line) comments
|
42
|
+
## would strip/break css colors eg. bgcolor=#ffff44
|
43
|
+
next if line.start_with?( '#' ) ## skip comments too
|
44
|
+
next if line.empty? ## skip empty lines for now
|
45
|
+
|
46
|
+
|
47
|
+
## note: for the table format
|
48
|
+
## see https://en.wikipedia.org/wiki/Help:Basic_table_markup
|
49
|
+
|
50
|
+
if line.start_with?( '{|' ) ## start table
|
51
|
+
inside_table = true
|
52
|
+
rows = []
|
53
|
+
elsif inside_table && line.start_with?( '|}' ) ## end table
|
54
|
+
tables << rows
|
55
|
+
rows = nil
|
56
|
+
row = nil
|
57
|
+
inside_table = false
|
58
|
+
elsif inside_table && line.start_with?( '|-' ) ## row divider
|
59
|
+
row = []
|
60
|
+
rows << row
|
61
|
+
elsif inside_table && line.start_with?( '!' ) ## header column
|
62
|
+
values = line.sub( '!', '' ).strip.split( '!!' )
|
63
|
+
## note: |- row divider is optional before header columns
|
64
|
+
if rows.empty?
|
65
|
+
row = []
|
66
|
+
rows << row
|
67
|
+
end
|
68
|
+
## add each value one-by-one for now (to keep (same) row reference)
|
69
|
+
## note: also strip leading (optional) attributes
|
70
|
+
values.each do |value|
|
71
|
+
row << strip_emphases( strip_attributes( value.strip ))
|
72
|
+
end
|
73
|
+
elsif inside_table && line.start_with?( '|' ) ## table data
|
74
|
+
values = line.sub( '|', '' ).strip.split( '||' )
|
75
|
+
## add each value one-by-one for now (to keep (same) row reference)
|
76
|
+
values.each do |value|
|
77
|
+
row << strip_emphases( strip_attributes( value.strip ))
|
78
|
+
end
|
79
|
+
elsif inside_table
|
80
|
+
puts "!! ERROR !! unknown line type inside table:"
|
81
|
+
puts line
|
82
|
+
exit 1
|
83
|
+
else
|
84
|
+
puts "!! ERROR !! unknown line type outside (before or after) table:"
|
85
|
+
puts line
|
86
|
+
exit 1
|
87
|
+
end
|
88
|
+
end
|
89
|
+
tables
|
90
|
+
end # method parse
|
91
|
+
|
92
|
+
####
|
93
|
+
# helper
|
94
|
+
def self.strip_attributes( value )
|
95
|
+
if value =~ /^[a-z]+=/ ## if starts with 'attribute='
|
96
|
+
value = value.sub( /[^|]+\|[ ]*/ , '' ) ## strip everything incl. pipe (|) and trailing spaces
|
97
|
+
else
|
98
|
+
value ## return as-is (pass-through)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.strip_emphases( value ) ## strip bold or emphasis; note: emphases plural of emphasis
|
103
|
+
value = value.gsub( /'{2,}/, '' ).strip ## remove two or more quotes e.g. '' or ''' etc.
|
104
|
+
value
|
105
|
+
end
|
106
|
+
|
107
|
+
end # class TableReader
|
108
|
+
|
109
|
+
end # module Wikiscript
|
data/lib/wikiscript/version.rb
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
|
2
2
|
module Wikiscript
|
3
|
-
VERSION = '0.
|
4
|
-
|
3
|
+
VERSION = '0.2.0'
|
4
|
+
|
5
|
+
def self.banner
|
6
|
+
"wikiscript/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
7
|
+
end
|
5
8
|
|
9
|
+
def self.root
|
10
|
+
"#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
|
11
|
+
end
|
12
|
+
end
|
data/lib/wikiscript.rb
CHANGED
@@ -18,19 +18,13 @@ require 'fetcher'
|
|
18
18
|
|
19
19
|
require 'wikiscript/version' # let it always go first
|
20
20
|
require 'wikiscript/client'
|
21
|
+
require 'wikiscript/table_reader'
|
22
|
+
require 'wikiscript/page_reader'
|
21
23
|
require 'wikiscript/page'
|
22
24
|
|
23
25
|
|
24
|
-
module Wikiscript
|
25
|
-
|
26
|
-
def self.banner
|
27
|
-
"wikiscript/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.root
|
31
|
-
"#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
|
32
|
-
end
|
33
26
|
|
27
|
+
module Wikiscript
|
34
28
|
|
35
29
|
## for now make lang a global - change why? why not??
|
36
30
|
def self.lang=(value)
|
@@ -42,8 +36,61 @@ module Wikiscript
|
|
42
36
|
@@lang ||= 'en'
|
43
37
|
end
|
44
38
|
|
39
|
+
##
|
40
|
+
## todo: fix? - strip spaces from link and title
|
41
|
+
## spaces possible? strip in ruby later e.g. use strip - why? why not?
|
42
|
+
## todo/change: find a better name - rename LINK_PATTERN to LINK_REGEX - why? why not?
|
43
|
+
LINK_PATTERN = %r{
|
44
|
+
\[\[
|
45
|
+
(?<link>[^|\]]+) # everything but pipe (|) or bracket (])
|
46
|
+
(?:
|
47
|
+
\|
|
48
|
+
(?<title>[^\]]+)
|
49
|
+
)? # optional wiki link title
|
50
|
+
\]\]
|
51
|
+
}x
|
52
|
+
|
53
|
+
|
54
|
+
def self.unlink( value )
|
55
|
+
## replace ALL wiki links with title (or link)
|
56
|
+
## e.g. [[Santiago]] ([[La Florida, Chile|La Florida]])
|
57
|
+
## => Santiago (La Florida)
|
58
|
+
value = value.gsub( LINK_PATTERN ) do |_|
|
59
|
+
link = $~[:link]
|
60
|
+
title = $~[:title]
|
61
|
+
|
62
|
+
if title
|
63
|
+
title
|
64
|
+
else
|
65
|
+
link
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
value.strip
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
def self.parse_link( value ) ## todo/change: find a better name - use match_link/etc. - why? why not?
|
74
|
+
## find first matching link
|
75
|
+
## return [nil,nil] if nothing found
|
76
|
+
if (m = LINK_PATTERN.match( value ))
|
77
|
+
link = m[:link]
|
78
|
+
title = m[:title]
|
79
|
+
|
80
|
+
link = link.strip ## remove leading and trailing spaces
|
81
|
+
title = title.strip if title
|
82
|
+
[link,title]
|
83
|
+
else
|
84
|
+
[nil,nil]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
45
88
|
end # module Wikiscript
|
46
89
|
|
47
90
|
|
48
91
|
|
92
|
+
## add camelcase alias
|
93
|
+
WikiScript = Wikiscript
|
94
|
+
|
95
|
+
|
49
96
|
puts Wikiscript.banner
|
data/test/helper.rb
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
## $:.unshift(File.dirname(__FILE__))
|
2
2
|
|
3
3
|
## minitest setup
|
4
|
-
|
5
|
-
# require 'minitest/unit'
|
6
4
|
require 'minitest/autorun'
|
7
5
|
|
8
|
-
# include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
|
9
6
|
|
10
7
|
## our own code
|
11
|
-
|
12
8
|
require 'wikiscript'
|
13
|
-
|
data/test/test_link.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_link.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestLink < MiniTest::Test
|
12
|
+
|
13
|
+
def test_unlink
|
14
|
+
assert_equal 'Santiago (La Florida)', Wikiscript.unlink( '[[Santiago]] ([[La Florida, Chile|La Florida]])' )
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_parse_link
|
18
|
+
link, title = Wikiscript.parse_link( '[[La Florida, Chile|La Florida]]' )
|
19
|
+
assert_equal 'La Florida, Chile', link
|
20
|
+
assert_equal 'La Florida', title
|
21
|
+
|
22
|
+
link, title = Wikiscript.parse_link( '[[ La Florida, Chile | La Florida ]]' )
|
23
|
+
assert_equal 'La Florida, Chile', link
|
24
|
+
assert_equal 'La Florida', title
|
25
|
+
|
26
|
+
link, title = Wikiscript.parse_link( 'La Florida' )
|
27
|
+
assert link == nil
|
28
|
+
assert title == nil
|
29
|
+
end
|
30
|
+
|
31
|
+
end # class TestLink
|
data/test/test_page.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'helper'
|
5
5
|
|
6
6
|
|
7
|
-
class TestPage < MiniTest::
|
7
|
+
class TestPage < MiniTest::Test
|
8
8
|
|
9
9
|
def setup
|
10
10
|
Wikiscript.lang = :en
|
@@ -18,10 +18,10 @@ class TestPage < MiniTest::Unit::TestCase
|
|
18
18
|
pp text[0..600]
|
19
19
|
|
20
20
|
## check for some snippets
|
21
|
-
assert
|
22
|
-
assert
|
23
|
-
assert
|
24
|
-
assert
|
21
|
+
assert /{{Infobox country/ =~ text
|
22
|
+
assert /common_name = Austria/ =~ text
|
23
|
+
assert /capital = \[\[Vienna\]\]/ =~ text
|
24
|
+
assert /The origins of modern-day Austria date back to the time/ =~ text
|
25
25
|
end
|
26
26
|
|
27
27
|
def test_sankt_poelten_en
|
@@ -32,10 +32,9 @@ class TestPage < MiniTest::Unit::TestCase
|
|
32
32
|
pp text[0..600]
|
33
33
|
|
34
34
|
## check for some snippets
|
35
|
-
assert
|
36
|
-
assert
|
37
|
-
assert
|
35
|
+
assert /{{Infobox Town AT/ =~ text
|
36
|
+
assert /Name\s+=\s+Sankt Pölten/ =~ text
|
37
|
+
assert /'''Sankt Pölten''' \(''St. Pölten''\) is the capital city of/ =~ text
|
38
38
|
end
|
39
39
|
|
40
40
|
end # class TestPage
|
41
|
-
|
data/test/test_page_de.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
require 'helper'
|
5
5
|
|
6
6
|
|
7
|
-
class TestPageDe < MiniTest::
|
7
|
+
class TestPageDe < MiniTest::Test
|
8
8
|
|
9
9
|
def setup
|
10
10
|
Wikiscript.lang = :de
|
@@ -18,11 +18,10 @@ class TestPageDe < MiniTest::Unit::TestCase
|
|
18
18
|
pp text[0..600]
|
19
19
|
|
20
20
|
## check for some snippets
|
21
|
-
assert
|
22
|
-
assert
|
23
|
-
assert
|
24
|
-
assert
|
21
|
+
assert /{{Infobox Gemeinde in Österreich/ =~ text
|
22
|
+
assert /Name\s+=\s+St\. Pölten/ =~ text
|
23
|
+
assert /'''St\. Pölten''' \(amtlicher Name,/ =~ text
|
24
|
+
## assert /Die Stadt liegt am Fluss \[\[Traisen \(Fluss\)\|Traisen\]\]/ =~ text
|
25
25
|
end
|
26
26
|
|
27
27
|
end # class TestPageDe
|
28
|
-
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_page_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestPageReader < MiniTest::Test
|
12
|
+
|
13
|
+
def test_basic
|
14
|
+
el = Wikiscript::PageReader.parse( <<TXT )
|
15
|
+
=Heading 1==
|
16
|
+
==Heading 2==
|
17
|
+
===Heading 3===
|
18
|
+
|
19
|
+
{|
|
20
|
+
|-
|
21
|
+
! header1
|
22
|
+
! header2
|
23
|
+
! header3
|
24
|
+
|-
|
25
|
+
| row1cell1
|
26
|
+
| row1cell2
|
27
|
+
| row1cell3
|
28
|
+
|-
|
29
|
+
| row2cell1
|
30
|
+
| row2cell2
|
31
|
+
| row2cell3
|
32
|
+
|}
|
33
|
+
TXT
|
34
|
+
|
35
|
+
pp el
|
36
|
+
|
37
|
+
assert_equal 4, el.size
|
38
|
+
assert_equal [:h1, 'Heading 1'], el[0]
|
39
|
+
assert_equal [:h2, 'Heading 2'], el[1]
|
40
|
+
assert_equal [:h3, 'Heading 3'], el[2]
|
41
|
+
assert_equal [:table, [['header1', 'header2', 'header3'],
|
42
|
+
['row1cell1', 'row1cell2', 'row1cell3'],
|
43
|
+
['row2cell1', 'row2cell2', 'row2cell3']]], el[3]
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_parse
|
47
|
+
page = Wikiscript::Page.new( 'Test', text: <<TXT )
|
48
|
+
=Heading 1==
|
49
|
+
==Heading 2==
|
50
|
+
===Heading 3===
|
51
|
+
|
52
|
+
{|
|
53
|
+
|-
|
54
|
+
! header1
|
55
|
+
! header2
|
56
|
+
! header3
|
57
|
+
|-
|
58
|
+
| row1cell1
|
59
|
+
| row1cell2
|
60
|
+
| row1cell3
|
61
|
+
|-
|
62
|
+
| row2cell1
|
63
|
+
| row2cell2
|
64
|
+
| row2cell3
|
65
|
+
|}
|
66
|
+
TXT
|
67
|
+
|
68
|
+
el = page.parse
|
69
|
+
pp el
|
70
|
+
|
71
|
+
assert_equal 4, el.size
|
72
|
+
assert_equal [:h1, 'Heading 1'], el[0]
|
73
|
+
assert_equal [:h2, 'Heading 2'], el[1]
|
74
|
+
assert_equal [:h3, 'Heading 3'], el[2]
|
75
|
+
assert_equal [:table, [['header1', 'header2', 'header3'],
|
76
|
+
['row1cell1', 'row1cell2', 'row1cell3'],
|
77
|
+
['row2cell1', 'row2cell2', 'row2cell3']]], el[3]
|
78
|
+
end
|
79
|
+
|
80
|
+
end # class TestPageReader
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_table_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestTableReader < MiniTest::Test
|
11
|
+
|
12
|
+
def test_basic
|
13
|
+
tables = Wikiscript::TableReader.parse( <<TXT )
|
14
|
+
{|
|
15
|
+
|-
|
16
|
+
! header1
|
17
|
+
! header2
|
18
|
+
! header3
|
19
|
+
|-
|
20
|
+
| row1cell1
|
21
|
+
| row1cell2
|
22
|
+
| row1cell3
|
23
|
+
|-
|
24
|
+
| row2cell1
|
25
|
+
| row2cell2
|
26
|
+
| row2cell3
|
27
|
+
|}
|
28
|
+
TXT
|
29
|
+
|
30
|
+
table = tables[0]
|
31
|
+
assert_equal 1, tables.size ## one table
|
32
|
+
assert_equal 3, table.size ## three rows
|
33
|
+
assert_equal ['header1', 'header2', 'header3'], table[0]
|
34
|
+
assert_equal ['row1cell1', 'row1cell2', 'row1cell3'], table[1]
|
35
|
+
assert_equal ['row2cell1', 'row2cell2', 'row2cell3'], table[2]
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_basic_ii ## with optional (missing) row divider before headers
|
39
|
+
tables = Wikiscript::TableReader.parse( <<TXT )
|
40
|
+
{|
|
41
|
+
! header1 !! header2 !! header3
|
42
|
+
|-
|
43
|
+
| row1cell1 || row1cell2 || row1cell3
|
44
|
+
|-
|
45
|
+
| row2cell1 || row2cell2 || row2cell3
|
46
|
+
|}
|
47
|
+
TXT
|
48
|
+
|
49
|
+
table = tables[0]
|
50
|
+
assert_equal 1, tables.size ## one table
|
51
|
+
assert_equal 3, table.size ## three rows
|
52
|
+
assert_equal ['header1', 'header2', 'header3'], table[0]
|
53
|
+
assert_equal ['row1cell1', 'row1cell2', 'row1cell3'], table[1]
|
54
|
+
assert_equal ['row2cell1', 'row2cell2', 'row2cell3'], table[2]
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_strip_attributes_and_emphases
|
58
|
+
tables = Wikiscript::TableReader.parse( <<TXT )
|
59
|
+
{|
|
60
|
+
|-
|
61
|
+
! style="width:200px;"|Club
|
62
|
+
! style="width:150px;"|City
|
63
|
+
|-
|
64
|
+
|[[Biu Chun Rangers]]||[[Sham Shui Po]]
|
65
|
+
|-
|
66
|
+
|bgcolor=#ffff44 |''[[Eastern Sports Club|Eastern]]''||[[Mong Kok]]
|
67
|
+
|-
|
68
|
+
|[[HKFC Soccer Section]]||[[Happy Valley, Hong Kong|Happy Valley]]
|
69
|
+
|}
|
70
|
+
TXT
|
71
|
+
|
72
|
+
table = tables[0]
|
73
|
+
assert_equal 1, tables.size ## one table
|
74
|
+
assert_equal 4, table.size ## four rows
|
75
|
+
assert_equal ['Club', 'City'], table[0]
|
76
|
+
assert_equal ['[[Biu Chun Rangers]]', '[[Sham Shui Po]]'], table[1]
|
77
|
+
assert_equal ['[[Eastern Sports Club|Eastern]]', '[[Mong Kok]]'], table[2]
|
78
|
+
assert_equal ['[[HKFC Soccer Section]]', '[[Happy Valley, Hong Kong|Happy Valley]]'], table[3]
|
79
|
+
end
|
80
|
+
|
81
|
+
end # class TestTableReader
|
metadata
CHANGED
@@ -1,108 +1,124 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wikiscript
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.2.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Gerald Bauer
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2019-09-20 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: logutils
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: fetcher
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rdoc
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- - ~>
|
45
|
+
- - "~>"
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '4.0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.0'
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: hoe
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
|
-
- - ~>
|
59
|
+
- - "~>"
|
53
60
|
- !ruby/object:Gem::Version
|
54
|
-
version: '3.
|
61
|
+
version: '3.16'
|
55
62
|
type: :development
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.16'
|
58
69
|
description: wikiscript - scripts for wikipedia (get wikitext for page etc.)
|
59
70
|
email: opensport@googlegroups.com
|
60
71
|
executables: []
|
61
72
|
extensions: []
|
62
73
|
extra_rdoc_files:
|
63
|
-
-
|
74
|
+
- CHANGELOG.md
|
75
|
+
- LICENSE.md
|
64
76
|
- Manifest.txt
|
77
|
+
- NOTES.md
|
65
78
|
- README.md
|
66
79
|
files:
|
67
|
-
-
|
80
|
+
- CHANGELOG.md
|
81
|
+
- LICENSE.md
|
68
82
|
- Manifest.txt
|
83
|
+
- NOTES.md
|
69
84
|
- README.md
|
70
85
|
- Rakefile
|
71
86
|
- lib/wikiscript.rb
|
72
87
|
- lib/wikiscript/client.rb
|
73
88
|
- lib/wikiscript/page.rb
|
89
|
+
- lib/wikiscript/page_reader.rb
|
90
|
+
- lib/wikiscript/table_reader.rb
|
74
91
|
- lib/wikiscript/version.rb
|
75
92
|
- test/helper.rb
|
93
|
+
- test/test_link.rb
|
76
94
|
- test/test_page.rb
|
77
95
|
- test/test_page_de.rb
|
78
|
-
- .
|
79
|
-
|
96
|
+
- test/test_page_reader.rb
|
97
|
+
- test/test_table_reader.rb
|
98
|
+
homepage: https://github.com/wikiscript/wikiscript
|
80
99
|
licenses:
|
81
100
|
- Public Domain
|
101
|
+
metadata: {}
|
82
102
|
post_install_message:
|
83
103
|
rdoc_options:
|
84
|
-
- --main
|
104
|
+
- "--main"
|
85
105
|
- README.md
|
86
106
|
require_paths:
|
87
107
|
- lib
|
88
108
|
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
109
|
requirements:
|
91
|
-
- -
|
110
|
+
- - ">="
|
92
111
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
112
|
+
version: 2.2.2
|
94
113
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
-
none: false
|
96
114
|
requirements:
|
97
|
-
- -
|
115
|
+
- - ">="
|
98
116
|
- !ruby/object:Gem::Version
|
99
117
|
version: '0'
|
100
118
|
requirements: []
|
101
119
|
rubyforge_project:
|
102
|
-
rubygems_version:
|
120
|
+
rubygems_version: 2.5.2
|
103
121
|
signing_key:
|
104
|
-
specification_version:
|
122
|
+
specification_version: 4
|
105
123
|
summary: wikiscript - scripts for wikipedia (get wikitext for page etc.)
|
106
|
-
test_files:
|
107
|
-
- test/test_page_de.rb
|
108
|
-
- test/test_page.rb
|
124
|
+
test_files: []
|
data/.gemtest
DELETED
File without changes
|