wikiscript 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -7,4 +7,5 @@ lib/wikiscript/client.rb
7
7
  lib/wikiscript/page.rb
8
8
  lib/wikiscript/version.rb
9
9
  test/helper.rb
10
- test/test_austria.rb
10
+ test/test_page.rb
11
+ test/test_page_de.rb
data/Rakefile CHANGED
@@ -18,7 +18,8 @@ Hoe.spec 'wikiscript' do
18
18
  self.history_file = 'HISTORY.md'
19
19
 
20
20
  self.extra_deps = [
21
- ['logutils' ]
21
+ ['logutils' ],
22
+ ['fetcher']
22
23
  ]
23
24
 
24
25
  self.licenses = ['Public Domain']
data/lib/wikiscript.rb CHANGED
@@ -1,17 +1,18 @@
1
+ # encoding: utf-8
2
+
1
3
  ## stdlibs
2
4
 
3
5
  require 'net/http'
4
6
  require 'uri'
5
- require 'json'
7
+ require 'cgi'
6
8
  require 'pp'
7
- require 'ostruct'
8
9
 
9
10
 
10
11
  ## 3rd party gems/libs
11
12
  ## require 'props'
12
13
 
13
14
  require 'logutils'
14
-
15
+ require 'fetcher'
15
16
 
16
17
  # our own code
17
18
 
@@ -29,7 +30,18 @@ module Wikiscript
29
30
  def self.root
30
31
  "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
31
32
  end
32
-
33
+
34
+
35
+ ## for now make lang a global - change why? why not??
36
+ def self.lang=(value)
37
+ @@lang = value.to_s # use to_s - lets you pass ing :en, :de etc.
38
+ end
39
+
40
+ def self.lang
41
+ # note: for now always returns a string e.g. 'en', 'de' etc. not a symbol
42
+ @@lang ||= 'en'
43
+ end
44
+
33
45
  end # module Wikiscript
34
46
 
35
47
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
 
2
3
  module Wikiscript
3
4
 
@@ -5,52 +6,65 @@ module Wikiscript
5
6
 
6
7
  include LogUtils::Logging
7
8
 
8
- SITE_BASE = 'http://en.wikipedia.org/w/index.php'
9
+ SITE_BASE = 'http://{lang}.wikipedia.org/w/index.php'
9
10
 
10
11
  ### API_BASE = 'http://en.wikipedia.org/w/api.php'
11
12
 
12
13
  def initialize( opts={} )
13
- @opts = opts
14
+ @opts = opts
15
+ @worker = Fetcher::Worker.new
14
16
  end
15
17
 
18
+ ## change to: wikitext why? why not? or to raw? why? why not?
16
19
  def text( title )
17
- ## todo/fix: urlencode title ???
18
- ## fix: use params hash!!!
19
- get( "action=raw&title=#{title}" )
20
+ ## todo/fix: convert spaces to _ if not present for wikipedia page title - why ?? why not ???
21
+ get( action: 'raw', title: title )
20
22
  end
21
23
 
22
24
  private
23
- ### fix: reuse code from fetcher gem!!!!
24
- ## do NOT duplicate!!! also cleanup jogo gem!!!!
25
+ def site_base
26
+ ## replace lang w/ lang config if present e.g.
27
+ ## http://{lang}.wikipedia.org/w/index.php
28
+ # becomes
29
+ # http://en.wikipedia.org/w/index.php or
30
+ # http://de.wikipedia.org/w/index.php etc
31
+
32
+ SITE_BASE.gsub( "{lang}", Wikiscript.lang )
33
+ end
34
+
35
+ def build_query( h )
36
+ h.map do |k,v|
37
+ "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
38
+ end.join( '&' )
39
+ end
25
40
 
26
41
  def get( params )
27
- uri = URI.parse( "#{SITE_BASE}?#{params}" )
28
-
29
-
30
- # new code: honor proxy env variable HTTP_PROXY
31
- proxy = ENV['HTTP_PROXY']
32
- proxy = ENV['http_proxy'] if proxy.nil? # try possible lower/case env variable (for *nix systems) is this necessary??
33
-
34
- if proxy
35
- proxy = URI.parse( proxy )
36
- logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
37
- if proxy.user && proxy.password
38
- logger.debug " using credentials: proxy.user=#{proxy.user}, proxy.password=****"
39
- else
40
- logger.debug " using no credentials"
41
- end
42
- else
43
- logger.debug "using direct net http access; no proxy configured"
44
- proxy = OpenStruct.new # all fields return nil (e.g. proxy.host, etc.)
45
- end
42
+ # note: lets us passing in params as hash e.g.
43
+ # action: 'raw', title: 'Austria'
44
+ # key and values will get CGI escaped
45
+ query = build_query( params )
46
46
 
47
- http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )
47
+ ## uri = URI.parse( "#{SITE_BASE}?#{params}" )
48
+ ## fix: pass in uri (add to fetcher check for is_a? URI etc.)
49
+ uri_string = "#{site_base}?#{query}"
48
50
 
49
- http = http_proxy.new( uri.host, uri.port )
50
- response = http.request( Net::HTTP::Get.new( uri.request_uri ))
51
+ response = @worker.get_response( uri_string )
51
52
 
52
53
  if response.code == '200'
53
54
  t = response.body
55
+ ###
56
+ # NB: Net::HTTP will NOT set encoding UTF-8 etc.
57
+ # will mostly be ASCII
58
+ # - try to change encoding to UTF-8 ourselves
59
+ logger.debug "t.encoding.name (before): #{t.encoding.name}"
60
+ #####
61
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
62
+
63
+ ## NB:
64
+ # for now "hardcoded" to utf8 - what else can we do?
65
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
66
+ t = t.force_encoding( Encoding::UTF_8 )
67
+ logger.debug "t.encoding.name (after): #{t.encoding.name}"
54
68
  ## pp t
55
69
  t
56
70
  else
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
 
2
3
  module Wikiscript
3
4
 
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Wikiscript
3
- VERSION = '0.1.0'
3
+ VERSION = '0.1.1'
4
4
  end
5
5
 
data/test/test_page.rb ADDED
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestPage < MiniTest::Unit::TestCase
8
+
9
+ def setup
10
+ Wikiscript.lang = :en
11
+ end
12
+
13
+ def test_austria_en
14
+ page = Wikiscript::Page.new( 'Austria' )
15
+ text = page.text
16
+
17
+ ## print first 600 chars
18
+ pp text[0..600]
19
+
20
+ ## check for some snippets
21
+ assert( /{{Infobox country/ =~ text )
22
+ assert( /common_name = Austria/ =~ text )
23
+ assert( /capital = \[\[Vienna\]\]/ =~ text )
24
+ assert( /The origins of modern-day Austria date back to the time/ =~ text )
25
+ end
26
+
27
+ def test_sankt_poelten_en
28
+ page = Wikiscript::Page.new( 'Sankt_Pölten' )
29
+ text = page.text
30
+
31
+ ## print first 600 chars
32
+ pp text[0..600]
33
+
34
+ ## check for some snippets
35
+ assert( /{{Infobox Town AT/ =~ text )
36
+ assert( /Name\s+=\s+Sankt Pölten/ =~ text )
37
+ assert( /'''Sankt Pölten''' \(''St. Pölten''\) is the capital city of/ =~ text )
38
+ end
39
+
40
+ end # class TestPage
41
+
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestPageDe < MiniTest::Unit::TestCase
8
+
9
+ def setup
10
+ Wikiscript.lang = :de
11
+ end
12
+
13
+ def test_st_poelten_de
14
+ page = Wikiscript::Page.new( 'St._Pölten' )
15
+ text = page.text
16
+
17
+ ## print first 600 chars
18
+ pp text[0..600]
19
+
20
+ ## check for some snippets
21
+ assert( /{{Infobox Gemeinde in Österreich/ =~ text )
22
+ assert( /Name\s+=\s+St\. Pölten/ =~ text )
23
+ assert( /'''St\. Pölten''' \(amtlicher Name,/ =~ text )
24
+ assert( /Die Stadt liegt am Fluss \[\[Traisen \(Fluss\)\|Traisen\]\]/ =~ text )
25
+ end
26
+
27
+ end # class TestPageDe
28
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikiscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-04 00:00:00.000000000 Z
12
+ date: 2014-07-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &73243800 !ruby/object:Gem::Requirement
16
+ requirement: &79133900 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,21 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *73243800
24
+ version_requirements: *79133900
25
+ - !ruby/object:Gem::Dependency
26
+ name: fetcher
27
+ requirement: &79133630 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *79133630
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rdoc
27
- requirement: &73243290 !ruby/object:Gem::Requirement
38
+ requirement: &79133350 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: '4.0'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *73243290
46
+ version_requirements: *79133350
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: hoe
38
- requirement: &73242780 !ruby/object:Gem::Requirement
49
+ requirement: &79133100 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,7 +54,7 @@ dependencies:
43
54
  version: '3.11'
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *73242780
57
+ version_requirements: *79133100
47
58
  description: wikiscript - scripts for wikipedia (get wikitext for page etc.)
48
59
  email: opensport@googlegroups.com
49
60
  executables: []
@@ -62,7 +73,8 @@ files:
62
73
  - lib/wikiscript/page.rb
63
74
  - lib/wikiscript/version.rb
64
75
  - test/helper.rb
65
- - test/test_austria.rb
76
+ - test/test_page.rb
77
+ - test/test_page_de.rb
66
78
  - .gemtest
67
79
  homepage: https://github.com/wikiscript/wikiscript.ruby
68
80
  licenses:
@@ -92,4 +104,5 @@ signing_key:
92
104
  specification_version: 3
93
105
  summary: wikiscript - scripts for wikipedia (get wikitext for page etc.)
94
106
  test_files:
95
- - test/test_austria.rb
107
+ - test/test_page_de.rb
108
+ - test/test_page.rb
data/test/test_austria.rb DELETED
@@ -1,24 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- require 'helper'
5
-
6
-
7
- class TestAustria < MiniTest::Unit::TestCase
8
-
9
- def test_text
10
- page = Wikiscript::Page.new( 'Austria' )
11
- text = page.text
12
-
13
- ## print first 600 chars
14
- pp text[0..600]
15
-
16
- ## check for some snippets
17
- assert( /{{Infobox country/ =~ text )
18
- assert( /common_name = Austria/ =~ text )
19
- assert( /capital = \[\[Vienna\]\]/ =~ text )
20
- assert( /The origins of modern-day Austria date back to the time/ =~ text )
21
- end
22
-
23
- end # class TestAustria
24
-