wikiscript 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: a34183c24ce2eac79cf72edcec39c562e6c74065
4
- data.tar.gz: 56fff65e58fc1fbbbc2eeaa111d22d6ebce84f63
2
+ SHA256:
3
+ metadata.gz: 9c6ea507fa295074d82e5d81eb88828c4473b35e5aee7f04b79cfbc774bb9e72
4
+ data.tar.gz: 56a5e69536495a90e0c1635799c8023ca57fdfbc554f818d40f86046972f13fb
5
5
  SHA512:
6
- metadata.gz: 85fa8e16dffbdfdaf6b683dae530a692eb6c570a03345ddf2a013b248bbb51aaab9ce495850bdd564a163e35c37f0f80b3f6d47b5ee994e8effe02c413ac8ba8
7
- data.tar.gz: ca8d9133a251f634db722575100d260c79859437fb393d9ac5b8181d0725f7d54a5290b4dfa0436a13aa73338c818adeea5c89c06b03b2f2e7360aa102f3e1c4
6
+ metadata.gz: 5d0bc2f5ec90d5a3c3f6c0c72c145f33a4faf1acccb28a3487059c715a8c60b5a5ed43fdd54318fe30669b1f64601a48eab0bfbba1f30da494f0b89a526cf054
7
+ data.tar.gz: ad591ed36382240029ddf9cf3e120fb938d3a253b8766cf04e0875b222b2349ee7473a1b0c851ee337afcd501d8830cea1da39d30eb78e60f9aa539d33408a59
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 0.4.0
2
+
1
3
  ### 0.0.1 / 2014-07-03
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -1,18 +1,11 @@
1
1
  CHANGELOG.md
2
- LICENSE.md
3
2
  Manifest.txt
4
- NOTES.md
5
3
  README.md
6
4
  Rakefile
7
5
  lib/wikiscript.rb
8
6
  lib/wikiscript/client.rb
7
+ lib/wikiscript/outline_reader.rb
9
8
  lib/wikiscript/page.rb
10
9
  lib/wikiscript/page_reader.rb
11
10
  lib/wikiscript/table_reader.rb
12
11
  lib/wikiscript/version.rb
13
- test/helper.rb
14
- test/test_link.rb
15
- test/test_page.rb
16
- test/test_page_de.rb
17
- test/test_page_reader.rb
18
- test/test_table_reader.rb
data/Rakefile CHANGED
@@ -8,23 +8,23 @@ Hoe.spec 'wikiscript' do
8
8
  self.summary = "wikiscript - scripts for wikipedia (get wikitext for page, parse tables 'n' links, etc.)"
9
9
  self.description = summary
10
10
 
11
- self.urls = ['https://github.com/wikiscript/wikiscript']
11
+ self.urls = { home: 'https://github.com/wikiscript/wikiscript' }
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'opensport@googlegroups.com'
14
+ self.email = 'gerald.bauer@gmail.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
18
18
  self.history_file = 'CHANGELOG.md'
19
19
 
20
20
  self.extra_deps = [
21
+ ['cocos'],
21
22
  ['logutils' ],
22
- ['fetcher']
23
23
  ]
24
24
 
25
25
  self.licenses = ['Public Domain']
26
26
 
27
27
  self.spec_extras = {
28
- required_ruby_version: '>= 2.2.2'
28
+ required_ruby_version: '>= 3.1.0'
29
29
  }
30
30
  end
@@ -1,19 +1,13 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Wikiscript
4
3
 
5
4
  class Client
5
+ include Logging
6
6
 
7
- include LogUtils::Logging
8
-
9
- SITE_BASE = 'http://{lang}.wikipedia.org/w/index.php'
7
+ SITE_BASE = 'https://{lang}.wikipedia.org/w/index.php'
10
8
 
11
9
  ### API_BASE = 'http://en.wikipedia.org/w/api.php'
12
10
 
13
- def initialize( opts={} )
14
- @opts = opts
15
- @worker = Fetcher::Worker.new
16
- end
17
11
 
18
12
  ## change to: wikitext or raw why? why not? or to raw? why? why not?
19
13
  def text( title, lang: Wikiscript.lang )
@@ -24,7 +18,7 @@ module Wikiscript
24
18
  # becomes
25
19
  # http://en.wikipedia.org/w/index.php or
26
20
  # http://de.wikipedia.org/w/index.php etc
27
- base_url = SITE_BASE.gsub( "{lang}", lang )
21
+ base_url = SITE_BASE.sub( "{lang}", lang )
28
22
  params = { action: 'raw',
29
23
  title: title }
30
24
 
@@ -33,6 +27,10 @@ module Wikiscript
33
27
 
34
28
  private
35
29
  def build_query( h )
30
+
31
+ ## todo/fix - check what to use for params encode
32
+ ## e.g. escape_component or such?
33
+ ## fix add params upstream to weblclient - why? why not?
36
34
  h.map do |k,v|
37
35
  "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
38
36
  end.join( '&' )
@@ -48,27 +46,12 @@ private
48
46
  ## fix: pass in uri (add to fetcher check for is_a? URI etc.)
49
47
  uri_string = "#{base_url}?#{query}"
50
48
 
51
- response = @worker.get_response( uri_string )
52
-
53
- if response.code == '200'
54
- t = response.body
55
- ###
56
- # NB: Net::HTTP will NOT set encoding UTF-8 etc.
57
- # will mostly be ASCII
58
- # - try to change encoding to UTF-8 ourselves
59
- logger.debug "t.encoding.name (before): #{t.encoding.name}"
60
- #####
61
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
49
+ response = Webclient.get( uri_string )
62
50
 
63
- ## NB:
64
- # for now "hardcoded" to utf8 - what else can we do?
65
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
66
- t = t.force_encoding( Encoding::UTF_8 )
67
- logger.debug "t.encoding.name (after): #{t.encoding.name}"
68
- ## pp t
69
- t
51
+ if response.status.ok?
52
+ response.text
70
53
  else
71
- logger.error "fetch HTTP - #{response.code} #{response.message}"
54
+ logger.error "HTTP ERROR - #{response.status.code} #{response.status.message}"
72
55
  exit 1 ### exit for now on error - why? why not?
73
56
  ## nil
74
57
  end
@@ -0,0 +1,97 @@
1
+
2
+ module Wikiscript
3
+
4
+ class OutlineReader
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+ def initialize( txt )
16
+ @txt = txt
17
+ end
18
+
19
+
20
+ HEADING_RE = %r{\A
21
+ (?<marker>={1,}) ## 1. leading ======
22
+ [ ]*
23
+ (?<text>[^=]+?) ## 2. text (note: for now no "inline" = allowed)
24
+ [ ]*
25
+ =* ## 3. (optional) trailing ====
26
+ \z}x
27
+
28
+ def parse
29
+ outline = [] ## outline / page structure
30
+
31
+ start_para = true ## start new para(graph) on new text line?
32
+
33
+ @txt.each_line do |line|
34
+
35
+ ##
36
+ ## (auto-)sanitize first
37
+ ## - &nbsp; => "vanilla" space
38
+ ## - 1–2 => 1-2 - "vanilla" dash
39
+ ## todo - move up into txt!!!
40
+ line = line.gsub( '&nbsp;', ' ' )
41
+ line = line.gsub( /[–]/, '-' )
42
+
43
+
44
+ line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
45
+
46
+ if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
47
+ start_para = true
48
+ next
49
+ end
50
+
51
+ break if line == '__END__'
52
+
53
+ next if line.start_with?( '#' ) ## skip comments too
54
+ ## strip inline (until end-of-line) comments too
55
+ ## e.g Eupen | KAS Eupen ## [de]
56
+ ## => Eupen | KAS Eupen
57
+ ## e.g bq Bonaire, BOE # CONCACAF
58
+ ## => bq Bonaire, BOE
59
+ line = line.sub( /#.*/, '' ).strip
60
+
61
+ ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
62
+ if m=HEADING_RE.match( line )
63
+ start_para = true
64
+
65
+ heading_marker = m[:marker]
66
+ heading_level = heading_marker.length ## count number of = for heading level
67
+ heading = m[:text].strip
68
+
69
+ outline << [:"h#{heading_level}", heading]
70
+ elsif line == '----' ## make more generic/flexible - why? why not?
71
+ start_para = true
72
+ outline << [:hr]
73
+ ## The horizontal rule represents a paragraph-level thematic break. Do not use in article content,
74
+ ## as rules are used only after main sections, and this is automatic.
75
+ ## HTML equivalent: <hr /> (which can be indented,
76
+ ## whereas ---- always starts at the left margin.)
77
+ else ## assume it's a (plain/regular) text line
78
+ if start_para
79
+ outline << [:p, [line]]
80
+ start_para = false
81
+ else
82
+ node = outline[-1] ## get last entry
83
+ if node[0] == :p ## assert it's a p(aragraph) node!!!
84
+ node[1] << line ## add line to p(aragraph)
85
+ else
86
+ puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
87
+ pp node
88
+ exit 1
89
+ end
90
+ end
91
+ end
92
+ end
93
+ outline
94
+ end # method parse
95
+ end # class OutlineReader
96
+
97
+ end # module Wikiscript
@@ -1,10 +1,9 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Wikiscript
4
3
 
5
4
  class Page
6
5
 
7
- include LogUtils::Logging
6
+ include Logging
8
7
 
9
8
  attr_reader :title, :lang
10
9
 
@@ -16,7 +15,7 @@ module Wikiscript
16
15
  end
17
16
 
18
17
  def self.read( path )
19
- text = File.open( path, 'r:utf-8' ).read
18
+ text = File.open( path, 'r:utf-8' ) { |f| f.read }
20
19
  o = new( text, title: "File:#{path}" ) ## use auto-generated File:<path> title path - why? why not?
21
20
  o
22
21
  end
@@ -1,11 +1,10 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Wikiscript
4
3
 
5
4
  class PageReader
6
5
 
7
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
8
- txt = File.open( path, 'r:utf-8' ).read
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
9
8
  parse( txt )
10
9
  end
11
10
 
@@ -54,7 +53,7 @@ class PageReader
54
53
  table_txt << line << "\n"
55
54
  else
56
55
  ## note: skip unknown line types for now
57
-
56
+
58
57
  ## puts "** !!! ERROR !!! unknown line type in wiki page:"
59
58
  ## pp line
60
59
  ## exit 1
@@ -1,11 +1,10 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Wikiscript
4
3
 
5
4
  class TableReader
6
5
 
7
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
8
- txt = File.open( path, 'r:utf-8' ).read
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
9
8
  parse( txt )
10
9
  end
11
10
 
@@ -1,12 +1,12 @@
1
1
 
2
2
  module Wikiscript
3
- VERSION = '0.3.2'
3
+ VERSION = '0.4.0'
4
4
 
5
5
  def self.banner
6
- "wikiscript/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
6
+ "wikiscript/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
7
7
  end
8
8
 
9
9
  def self.root
10
- "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
10
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
11
11
  end
12
12
  end
data/lib/wikiscript.rb CHANGED
@@ -1,26 +1,25 @@
1
- # encoding: utf-8
2
-
3
- ## stdlibs
4
-
5
- require 'net/http'
6
- require 'uri'
7
- require 'cgi'
8
- require 'pp'
1
+ ## stdlibs via cocos
2
+ require 'cocos'
9
3
 
10
4
 
11
5
  ## 3rd party gems/libs
12
6
  ## require 'props'
13
7
 
14
8
  require 'logutils'
15
- require 'fetcher'
16
9
 
17
- # our own code
10
+ module Wikiscript
11
+ Logging = LogUtils::Logging
12
+ end
18
13
 
19
- require 'wikiscript/version' # let it always go first
20
- require 'wikiscript/client'
21
- require 'wikiscript/table_reader'
22
- require 'wikiscript/page_reader'
23
- require 'wikiscript/page'
14
+
15
+
16
+ # our own code
17
+ require_relative 'wikiscript/version' # let it always go first
18
+ require_relative 'wikiscript/client'
19
+ require_relative 'wikiscript/table_reader'
20
+ require_relative 'wikiscript/page_reader'
21
+ require_relative 'wikiscript/outline_reader'
22
+ require_relative 'wikiscript/page'
24
23
 
25
24
 
26
25
 
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikiscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-22 00:00:00.000000000 Z
11
+ date: 2024-09-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: logutils
14
+ name: cocos
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: fetcher
28
+ name: logutils
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -42,65 +42,62 @@ dependencies:
42
42
  name: rdoc
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '4.0'
48
+ - - "<"
49
+ - !ruby/object:Gem::Version
50
+ version: '7'
48
51
  type: :development
49
52
  prerelease: false
50
53
  version_requirements: !ruby/object:Gem::Requirement
51
54
  requirements:
52
- - - "~>"
55
+ - - ">="
53
56
  - !ruby/object:Gem::Version
54
57
  version: '4.0'
58
+ - - "<"
59
+ - !ruby/object:Gem::Version
60
+ version: '7'
55
61
  - !ruby/object:Gem::Dependency
56
62
  name: hoe
57
63
  requirement: !ruby/object:Gem::Requirement
58
64
  requirements:
59
65
  - - "~>"
60
66
  - !ruby/object:Gem::Version
61
- version: '3.16'
67
+ version: '4.1'
62
68
  type: :development
63
69
  prerelease: false
64
70
  version_requirements: !ruby/object:Gem::Requirement
65
71
  requirements:
66
72
  - - "~>"
67
73
  - !ruby/object:Gem::Version
68
- version: '3.16'
74
+ version: '4.1'
69
75
  description: wikiscript - scripts for wikipedia (get wikitext for page, parse tables
70
76
  'n' links, etc.)
71
- email: opensport@googlegroups.com
77
+ email: gerald.bauer@gmail.com
72
78
  executables: []
73
79
  extensions: []
74
80
  extra_rdoc_files:
75
81
  - CHANGELOG.md
76
- - LICENSE.md
77
82
  - Manifest.txt
78
- - NOTES.md
79
83
  - README.md
80
84
  files:
81
85
  - CHANGELOG.md
82
- - LICENSE.md
83
86
  - Manifest.txt
84
- - NOTES.md
85
87
  - README.md
86
88
  - Rakefile
87
89
  - lib/wikiscript.rb
88
90
  - lib/wikiscript/client.rb
91
+ - lib/wikiscript/outline_reader.rb
89
92
  - lib/wikiscript/page.rb
90
93
  - lib/wikiscript/page_reader.rb
91
94
  - lib/wikiscript/table_reader.rb
92
95
  - lib/wikiscript/version.rb
93
- - test/helper.rb
94
- - test/test_link.rb
95
- - test/test_page.rb
96
- - test/test_page_de.rb
97
- - test/test_page_reader.rb
98
- - test/test_table_reader.rb
99
96
  homepage: https://github.com/wikiscript/wikiscript
100
97
  licenses:
101
98
  - Public Domain
102
99
  metadata: {}
103
- post_install_message:
100
+ post_install_message:
104
101
  rdoc_options:
105
102
  - "--main"
106
103
  - README.md
@@ -110,16 +107,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
110
107
  requirements:
111
108
  - - ">="
112
109
  - !ruby/object:Gem::Version
113
- version: 2.2.2
110
+ version: 3.1.0
114
111
  required_rubygems_version: !ruby/object:Gem::Requirement
115
112
  requirements:
116
113
  - - ">="
117
114
  - !ruby/object:Gem::Version
118
115
  version: '0'
119
116
  requirements: []
120
- rubyforge_project:
121
- rubygems_version: 2.5.2
122
- signing_key:
117
+ rubygems_version: 3.4.10
118
+ signing_key:
123
119
  specification_version: 4
124
120
  summary: wikiscript - scripts for wikipedia (get wikitext for page, parse tables 'n'
125
121
  links, etc.)
data/LICENSE.md DELETED
@@ -1,116 +0,0 @@
1
- CC0 1.0 Universal
2
-
3
- Statement of Purpose
4
-
5
- The laws of most jurisdictions throughout the world automatically confer
6
- exclusive Copyright and Related Rights (defined below) upon the creator and
7
- subsequent owner(s) (each and all, an "owner") of an original work of
8
- authorship and/or a database (each, a "Work").
9
-
10
- Certain owners wish to permanently relinquish those rights to a Work for the
11
- purpose of contributing to a commons of creative, cultural and scientific
12
- works ("Commons") that the public can reliably and without fear of later
13
- claims of infringement build upon, modify, incorporate in other works, reuse
14
- and redistribute as freely as possible in any form whatsoever and for any
15
- purposes, including without limitation commercial purposes. These owners may
16
- contribute to the Commons to promote the ideal of a free culture and the
17
- further production of creative, cultural and scientific works, or to gain
18
- reputation or greater distribution for their Work in part through the use and
19
- efforts of others.
20
-
21
- For these and/or other purposes and motivations, and without any expectation
22
- of additional consideration or compensation, the person associating CC0 with a
23
- Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
24
- and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
25
- and publicly distribute the Work under its terms, with knowledge of his or her
26
- Copyright and Related Rights in the Work and the meaning and intended legal
27
- effect of CC0 on those rights.
28
-
29
- 1. Copyright and Related Rights. A Work made available under CC0 may be
30
- protected by copyright and related or neighboring rights ("Copyright and
31
- Related Rights"). Copyright and Related Rights include, but are not limited
32
- to, the following:
33
-
34
- i. the right to reproduce, adapt, distribute, perform, display, communicate,
35
- and translate a Work;
36
-
37
- ii. moral rights retained by the original author(s) and/or performer(s);
38
-
39
- iii. publicity and privacy rights pertaining to a person's image or likeness
40
- depicted in a Work;
41
-
42
- iv. rights protecting against unfair competition in regards to a Work,
43
- subject to the limitations in paragraph 4(a), below;
44
-
45
- v. rights protecting the extraction, dissemination, use and reuse of data in
46
- a Work;
47
-
48
- vi. database rights (such as those arising under Directive 96/9/EC of the
49
- European Parliament and of the Council of 11 March 1996 on the legal
50
- protection of databases, and under any national implementation thereof,
51
- including any amended or successor version of such directive); and
52
-
53
- vii. other similar, equivalent or corresponding rights throughout the world
54
- based on applicable law or treaty, and any national implementations thereof.
55
-
56
- 2. Waiver. To the greatest extent permitted by, but not in contravention of,
57
- applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
58
- unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
59
- and Related Rights and associated claims and causes of action, whether now
60
- known or unknown (including existing as well as future claims and causes of
61
- action), in the Work (i) in all territories worldwide, (ii) for the maximum
62
- duration provided by applicable law or treaty (including future time
63
- extensions), (iii) in any current or future medium and for any number of
64
- copies, and (iv) for any purpose whatsoever, including without limitation
65
- commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
66
- the Waiver for the benefit of each member of the public at large and to the
67
- detriment of Affirmer's heirs and successors, fully intending that such Waiver
68
- shall not be subject to revocation, rescission, cancellation, termination, or
69
- any other legal or equitable action to disrupt the quiet enjoyment of the Work
70
- by the public as contemplated by Affirmer's express Statement of Purpose.
71
-
72
- 3. Public License Fallback. Should any part of the Waiver for any reason be
73
- judged legally invalid or ineffective under applicable law, then the Waiver
74
- shall be preserved to the maximum extent permitted taking into account
75
- Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
76
- is so judged Affirmer hereby grants to each affected person a royalty-free,
77
- non transferable, non sublicensable, non exclusive, irrevocable and
78
- unconditional license to exercise Affirmer's Copyright and Related Rights in
79
- the Work (i) in all territories worldwide, (ii) for the maximum duration
80
- provided by applicable law or treaty (including future time extensions), (iii)
81
- in any current or future medium and for any number of copies, and (iv) for any
82
- purpose whatsoever, including without limitation commercial, advertising or
83
- promotional purposes (the "License"). The License shall be deemed effective as
84
- of the date CC0 was applied by Affirmer to the Work. Should any part of the
85
- License for any reason be judged legally invalid or ineffective under
86
- applicable law, such partial invalidity or ineffectiveness shall not
87
- invalidate the remainder of the License, and in such case Affirmer hereby
88
- affirms that he or she will not (i) exercise any of his or her remaining
89
- Copyright and Related Rights in the Work or (ii) assert any associated claims
90
- and causes of action with respect to the Work, in either case contrary to
91
- Affirmer's express Statement of Purpose.
92
-
93
- 4. Limitations and Disclaimers.
94
-
95
- a. No trademark or patent rights held by Affirmer are waived, abandoned,
96
- surrendered, licensed or otherwise affected by this document.
97
-
98
- b. Affirmer offers the Work as-is and makes no representations or warranties
99
- of any kind concerning the Work, express, implied, statutory or otherwise,
100
- including without limitation warranties of title, merchantability, fitness
101
- for a particular purpose, non infringement, or the absence of latent or
102
- other defects, accuracy, or the present or absence of errors, whether or not
103
- discoverable, all to the greatest extent permissible under applicable law.
104
-
105
- c. Affirmer disclaims responsibility for clearing rights of other persons
106
- that may apply to the Work or any use thereof, including without limitation
107
- any person's Copyright and Related Rights in the Work. Further, Affirmer
108
- disclaims responsibility for obtaining any necessary consents, permissions
109
- or other rights required for any use of the Work.
110
-
111
- d. Affirmer understands and acknowledges that Creative Commons is not a
112
- party to this document and has no duty or obligation with respect to this
113
- CC0 or use of the Work.
114
-
115
- For more information, please see
116
- <http://creativecommons.org/publicdomain/zero/1.0/>
data/NOTES.md DELETED
@@ -1,52 +0,0 @@
1
- # Notes
2
-
3
-
4
- ## Alternatives
5
-
6
- Wikipedia
7
-
8
- - [wikipedia-client](https://rubygems.org/gems/wikipedia-client) by Ken Pratt et al - ruby client for the Wikipedia API
9
- - <https://github.com/kenpratt/wikipedia-client>
10
- - <https://www.rubydoc.info/gems/wikipedia-client>
11
-
12
- <!-- break -->
13
-
14
- - [infoboxer](https://rubygems.org/gems/infoboxer) by Victor Shepelev et al - pure-Ruby Wikipedia (and generic MediaWiki) client and parser, targeting information extraction
15
- - <https://github.com/molybdenum-99/infoboxer>
16
- - <https://www.rubydoc.info/gems/infoboxer>
17
-
18
- <!-- break -->
19
-
20
- More
21
-
22
- - <https://github.com/molybdenum-99/reality>
23
- - https://github.com/molybdenum-99/mediawiktory
24
-
25
-
26
- Wikidata
27
-
28
- - [wikidata](https://rubygems.org/gems/wikidata) by Wil Gieseler
29
- - <https://github.com/wilg/wikidata>
30
- - <https://www.rubydoc.info/gems/wikidata>
31
-
32
- <!-- break -->
33
-
34
- - [wikidata-fetcher](https://rubygems.org/gems/wikidata-fetcher)
35
- - <https://github.com/everypolitician/wikidata-fetcher>
36
-
37
- <!-- break -->
38
-
39
- - [mediawiki_api-wikidata](https://rubygems.org/gems/mediawiki_api-wikidata)
40
- - <https://github.com/wmde/WikidataApiGem>
41
-
42
-
43
-
44
- **Python**
45
-
46
- - <https://pypi.org/project/wptools/> - Wikipedia tools (for Humans)
47
- - <https://github.com/siznax/wptools/>
48
-
49
-
50
- ## Wikipedia
51
-
52
- - Wikipedia API reference: <http://en.wikipedia.org/w/api.php>
data/test/helper.rb DELETED
@@ -1,8 +0,0 @@
1
- ## $:.unshift(File.dirname(__FILE__))
2
-
3
- ## minitest setup
4
- require 'minitest/autorun'
5
-
6
-
7
- ## our own code
8
- require 'wikiscript'
data/test/test_link.rb DELETED
@@ -1,31 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_link.rb
6
-
7
-
8
- require 'helper'
9
-
10
-
11
- class TestLink < MiniTest::Test
12
-
13
- def test_unlink
14
- assert_equal 'Santiago (La Florida)', Wikiscript.unlink( '[[Santiago]] ([[La Florida, Chile|La Florida]])' )
15
- end
16
-
17
- def test_parse_link
18
- link, title = Wikiscript.parse_link( '[[La Florida, Chile|La Florida]]' )
19
- assert_equal 'La Florida, Chile', link
20
- assert_equal 'La Florida', title
21
-
22
- link, title = Wikiscript.parse_link( '[[ La Florida, Chile | La Florida ]]' )
23
- assert_equal 'La Florida, Chile', link
24
- assert_equal 'La Florida', title
25
-
26
- link, title = Wikiscript.parse_link( 'La Florida' )
27
- assert link == nil
28
- assert title == nil
29
- end
30
-
31
- end # class TestLink
data/test/test_page.rb DELETED
@@ -1,54 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_page.rb
6
-
7
-
8
- require 'helper'
9
-
10
-
11
- class TestPage < MiniTest::Test
12
-
13
- def setup
14
- Wikiscript.lang = :en
15
- end
16
-
17
- def test_austria_en
18
- page = Wikiscript::Page.get( 'Austria' )
19
- # [debug] GET /w/index.php?action=raw&title=Austria uri=http://en.wikipedia.org/w/index.php?action=raw&title=Austria, redirect_limit=5
20
- # [debug] 301 TLS Redirect location=https://en.wikipedia.org/w/index.php?action=raw&title=Austria
21
- # [debug] GET /w/index.php?action=raw&title=Austria uri=https://en.wikipedia.org/w/index.php?action=raw&title=Austria, redirect_limit=4
22
- # [debug] 200 OK
23
-
24
- text = page.text
25
-
26
- ## print first 600 chars
27
- pp text[0..600]
28
-
29
- ## check for some snippets
30
- assert /{{Infobox country/ =~ text
31
- assert /common_name = Austria/ =~ text
32
- assert /capital = \[\[Vienna\]\]/ =~ text
33
- # assert /The origins of modern-day Austria date back to the time/ =~ text
34
- end
35
-
36
- def test_sankt_poelten_en
37
- page = Wikiscript::Page.get( 'Sankt_Pölten' )
38
- # [debug] GET /w/index.php?action=raw&title=Sankt_P%C3%B6lten uri=http://en.wikipedia.org/w/index.php?action=raw&title=Sankt_P%C3%B6lten, redirect_limit=5
39
- # [debug] 301 TLS Redirect location=https://en.wikipedia.org/w/index.php?action=raw&title=Sankt_P%C3%B6lten
40
- # [debug] GET /w/index.php?action=raw&title=Sankt_P%C3%B6lten uri=https://en.wikipedia.org/w/index.php?action=raw&title=Sankt_P%C3%B6lten, redirect_limit=4
41
- # [debug] 200 OK
42
-
43
- text = page.text
44
-
45
- ## print first 600 chars
46
- pp text[0..600]
47
-
48
- ## check for some snippets
49
- assert /{{Infobox settlement/ =~ text
50
- assert /name\s+=\s+Sankt Pölten/ =~ text
51
- # assert /'''Sankt Pölten''' \(''St. Pölten''\) is the capital city of/ =~ text
52
- end
53
-
54
- end # class TestPage
data/test/test_page_de.rb DELETED
@@ -1,38 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- ###
5
- # to run use
6
- # ruby -I ./lib -I ./test test/test_page_de.rb
7
-
8
-
9
- require 'helper'
10
-
11
-
12
- class TestPageDe < MiniTest::Test
13
-
14
- def setup
15
- Wikiscript.lang = :de
16
- end
17
-
18
- def test_st_poelten_de
19
- page = Wikiscript::Page.get( 'St._Pölten' )
20
- # [debug] GET /w/index.php?action=raw&title=St._P%C3%B6lten uri=http://de.wikipedia.org/w/index.php?action=raw&title=St._P%C3%B6lten, redirect_limit=5
21
- # [debug] 301 TLS Redirect location=https://de.wikipedia.org/w/index.php?action=raw&title=St._P%C3%B6lten
22
- # [debug] GET /w/index.php?action=raw&title=St._P%C3%B6lten uri=https://de.wikipedia.org/w/index.php?action=raw&title=St._P%C3%B6lten, redirect_limit=4
23
- # [debug] 200 OK
24
-
25
-
26
- text = page.text
27
-
28
- ## print first 600 chars
29
- pp text[0..600]
30
-
31
- ## check for some snippets
32
- assert /{{Infobox Gemeinde in Österreich/ =~ text
33
- assert /Name\s+=\s+St\. Pölten/ =~ text
34
- assert /'''St\. Pölten''' \(amtlicher Name,/ =~ text
35
- ## assert /Die Stadt liegt am Fluss \[\[Traisen \(Fluss\)\|Traisen\]\]/ =~ text
36
- end
37
-
38
- end # class TestPageDe
@@ -1,117 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_page_reader.rb
6
-
7
-
8
- require 'helper'
9
-
10
-
11
- class TestPageReader < MiniTest::Test
12
-
13
- def test_basic
14
- nodes = Wikiscript.parse( <<TXT )
15
- =Heading 1==
16
- ==Heading 2==
17
- ===Heading 3===
18
-
19
- {|
20
- |-
21
- ! header1
22
- ! header2
23
- ! header3
24
- |-
25
- | row1cell1
26
- | row1cell2
27
- | row1cell3
28
- |-
29
- | row2cell1
30
- | row2cell2
31
- | row2cell3
32
- |}
33
- TXT
34
-
35
- pp nodes
36
-
37
- assert_equal 4, nodes.size
38
- assert_equal [:h1, 'Heading 1'], nodes[0]
39
- assert_equal [:h2, 'Heading 2'], nodes[1]
40
- assert_equal [:h3, 'Heading 3'], nodes[2]
41
- assert_equal [:table, [['header1', 'header2', 'header3'],
42
- ['row1cell1', 'row1cell2', 'row1cell3'],
43
- ['row2cell1', 'row2cell2', 'row2cell3']]], nodes[3]
44
- end
45
-
46
- def test_parse
47
- page = Wikiscript::Page.new( <<TXT )
48
- =Heading 1==
49
- ==Heading 2==
50
- ===Heading 3===
51
-
52
- {|
53
- |-
54
- ! header1
55
- ! header2
56
- ! header3
57
- |-
58
- | row1cell1
59
- | row1cell2
60
- | row1cell3
61
- |-
62
- | row2cell1
63
- | row2cell2
64
- | row2cell3
65
- |}
66
- TXT
67
-
68
- nodes = page.parse
69
- pp nodes
70
-
71
- assert_equal 4, nodes.size
72
- assert_equal [:h1, 'Heading 1'], nodes[0]
73
- assert_equal [:h2, 'Heading 2'], nodes[1]
74
- assert_equal [:h3, 'Heading 3'], nodes[2]
75
- assert_equal [:table, [['header1', 'header2', 'header3'],
76
- ['row1cell1', 'row1cell2', 'row1cell3'],
77
- ['row2cell1', 'row2cell2', 'row2cell3']]], nodes[3]
78
- end
79
-
80
- def test_each
81
- page = Wikiscript::Page.new( <<TXT )
82
- =Heading 1==
83
- ==Heading 2==
84
- ===Heading 3===
85
-
86
- {|
87
- |-
88
- ! header1
89
- ! header2
90
- ! header3
91
- |-
92
- | row1cell1
93
- | row1cell2
94
- | row1cell3
95
- |-
96
- | row2cell1
97
- | row2cell2
98
- | row2cell3
99
- |}
100
- TXT
101
-
102
- nodes = []
103
- page.each do |node|
104
- nodes << node
105
- end
106
- pp nodes
107
-
108
- assert_equal 4, nodes.size
109
- assert_equal [:h1, 'Heading 1'], nodes[0]
110
- assert_equal [:h2, 'Heading 2'], nodes[1]
111
- assert_equal [:h3, 'Heading 3'], nodes[2]
112
- assert_equal [:table, [['header1', 'header2', 'header3'],
113
- ['row1cell1', 'row1cell2', 'row1cell3'],
114
- ['row2cell1', 'row2cell2', 'row2cell3']]], nodes[3]
115
- end
116
-
117
- end # class TestPageReader
@@ -1,109 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_table_reader.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestTableReader < MiniTest::Test
11
-
12
- def test_basic
13
- table = Wikiscript.parse_table( <<TXT )
14
- {|
15
- |-
16
- ! header1
17
- ! header2
18
- ! header3
19
- |-
20
- | row1cell1
21
- | row1cell2
22
- | row1cell3
23
- |-
24
- | row2cell1
25
- | row2cell2
26
- | row2cell3
27
- |}
28
- TXT
29
-
30
- assert_equal 3, table.size ## three rows
31
- assert_equal ['header1', 'header2', 'header3'], table[0]
32
- assert_equal ['row1cell1', 'row1cell2', 'row1cell3'], table[1]
33
- assert_equal ['row2cell1', 'row2cell2', 'row2cell3'], table[2]
34
- end
35
-
36
- def test_basic_ii ## with optional (missing) row divider before headers
37
- table = Wikiscript.parse_table( <<TXT )
38
- {|
39
- ! header1 !! header2 !! header3
40
- |-
41
- | row1cell1 || row1cell2 || row1cell3
42
- |-
43
- | row2cell1 || row2cell2 || row2cell3
44
- |}
45
- TXT
46
-
47
- assert_equal 3, table.size ## three rows
48
- assert_equal ['header1', 'header2', 'header3'], table[0]
49
- assert_equal ['row1cell1', 'row1cell2', 'row1cell3'], table[1]
50
- assert_equal ['row2cell1', 'row2cell2', 'row2cell3'], table[2]
51
- end
52
-
53
- def test_basic_iii # with continuing header column lines
54
- table = Wikiscript.parse_table( <<TXT )
55
- {|
56
- |-
57
- !
58
- header1
59
- !
60
- header2
61
- !
62
- header3
63
- |-
64
- |
65
- row1cell1
66
- |
67
- row1cell2
68
- |
69
- row1cell3
70
- |-
71
- |
72
- row2cell1
73
- |
74
- row2cell2
75
- |
76
- row2cell3
77
- |}
78
- TXT
79
-
80
- assert_equal 3, table.size ## three rows
81
- assert_equal ['header1', 'header2', 'header3'], table[0]
82
- assert_equal ['row1cell1', 'row1cell2', 'row1cell3'], table[1]
83
- assert_equal ['row2cell1', 'row2cell2', 'row2cell3'], table[2]
84
- end
85
-
86
-
87
- def test_strip_attributes_and_emphases
88
- table = Wikiscript.parse_table( <<TXT )
89
- {|
90
- |-
91
- ! style="width:200px;"|Club
92
- ! style="width:150px;"|City
93
- |-
94
- |[[Biu Chun Rangers]]||[[Sham Shui Po]]
95
- |-
96
- |bgcolor=#ffff44 |''[[Eastern Sports Club|Eastern]]''||[[Mong Kok]]
97
- |-
98
- |[[HKFC Soccer Section]]||[[Happy Valley, Hong Kong|Happy Valley]]
99
- |}
100
- TXT
101
-
102
- assert_equal 4, table.size ## four rows
103
- assert_equal ['Club', 'City'], table[0]
104
- assert_equal ['[[Biu Chun Rangers]]', '[[Sham Shui Po]]'], table[1]
105
- assert_equal ['[[Eastern Sports Club|Eastern]]', '[[Mong Kok]]'], table[2]
106
- assert_equal ['[[HKFC Soccer Section]]', '[[Happy Valley, Hong Kong|Happy Valley]]'], table[3]
107
- end
108
-
109
- end # class TestTableReader