wikiscript 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -7,4 +7,5 @@ lib/wikiscript/client.rb
7
7
  lib/wikiscript/page.rb
8
8
  lib/wikiscript/version.rb
9
9
  test/helper.rb
10
- test/test_austria.rb
10
+ test/test_page.rb
11
+ test/test_page_de.rb
data/Rakefile CHANGED
@@ -18,7 +18,8 @@ Hoe.spec 'wikiscript' do
18
18
  self.history_file = 'HISTORY.md'
19
19
 
20
20
  self.extra_deps = [
21
- ['logutils' ]
21
+ ['logutils' ],
22
+ ['fetcher']
22
23
  ]
23
24
 
24
25
  self.licenses = ['Public Domain']
data/lib/wikiscript.rb CHANGED
@@ -1,17 +1,18 @@
1
+ # encoding: utf-8
2
+
1
3
  ## stdlibs
2
4
 
3
5
  require 'net/http'
4
6
  require 'uri'
5
- require 'json'
7
+ require 'cgi'
6
8
  require 'pp'
7
- require 'ostruct'
8
9
 
9
10
 
10
11
  ## 3rd party gems/libs
11
12
  ## require 'props'
12
13
 
13
14
  require 'logutils'
14
-
15
+ require 'fetcher'
15
16
 
16
17
  # our own code
17
18
 
@@ -29,7 +30,18 @@ module Wikiscript
29
30
  def self.root
30
31
  "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
31
32
  end
32
-
33
+
34
+
35
+ ## for now make lang a global - change why? why not??
36
+ def self.lang=(value)
37
+ @@lang = value.to_s # use to_s - lets you pass ing :en, :de etc.
38
+ end
39
+
40
+ def self.lang
41
+ # note: for now always returns a string e.g. 'en', 'de' etc. not a symbol
42
+ @@lang ||= 'en'
43
+ end
44
+
33
45
  end # module Wikiscript
34
46
 
35
47
 
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
 
2
3
  module Wikiscript
3
4
 
@@ -5,52 +6,65 @@ module Wikiscript
5
6
 
6
7
  include LogUtils::Logging
7
8
 
8
- SITE_BASE = 'http://en.wikipedia.org/w/index.php'
9
+ SITE_BASE = 'http://{lang}.wikipedia.org/w/index.php'
9
10
 
10
11
  ### API_BASE = 'http://en.wikipedia.org/w/api.php'
11
12
 
12
13
  def initialize( opts={} )
13
- @opts = opts
14
+ @opts = opts
15
+ @worker = Fetcher::Worker.new
14
16
  end
15
17
 
18
+ ## change to: wikitext why? why not? or to raw? why? why not?
16
19
  def text( title )
17
- ## todo/fix: urlencode title ???
18
- ## fix: use params hash!!!
19
- get( "action=raw&title=#{title}" )
20
+ ## todo/fix: convert spaces to _ if not present for wikipedia page title - why ?? why not ???
21
+ get( action: 'raw', title: title )
20
22
  end
21
23
 
22
24
  private
23
- ### fix: reuse code from fetcher gem!!!!
24
- ## do NOT duplicate!!! also cleanup jogo gem!!!!
25
+ def site_base
26
+ ## replace lang w/ lang config if present e.g.
27
+ ## http://{lang}.wikipedia.org/w/index.php
28
+ # becomes
29
+ # http://en.wikipedia.org/w/index.php or
30
+ # http://de.wikipedia.org/w/index.php etc
31
+
32
+ SITE_BASE.gsub( "{lang}", Wikiscript.lang )
33
+ end
34
+
35
+ def build_query( h )
36
+ h.map do |k,v|
37
+ "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}"
38
+ end.join( '&' )
39
+ end
25
40
 
26
41
  def get( params )
27
- uri = URI.parse( "#{SITE_BASE}?#{params}" )
28
-
29
-
30
- # new code: honor proxy env variable HTTP_PROXY
31
- proxy = ENV['HTTP_PROXY']
32
- proxy = ENV['http_proxy'] if proxy.nil? # try possible lower/case env variable (for *nix systems) is this necessary??
33
-
34
- if proxy
35
- proxy = URI.parse( proxy )
36
- logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
37
- if proxy.user && proxy.password
38
- logger.debug " using credentials: proxy.user=#{proxy.user}, proxy.password=****"
39
- else
40
- logger.debug " using no credentials"
41
- end
42
- else
43
- logger.debug "using direct net http access; no proxy configured"
44
- proxy = OpenStruct.new # all fields return nil (e.g. proxy.host, etc.)
45
- end
42
+ # note: lets us passing in params as hash e.g.
43
+ # action: 'raw', title: 'Austria'
44
+ # key and values will get CGI escaped
45
+ query = build_query( params )
46
46
 
47
- http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )
47
+ ## uri = URI.parse( "#{SITE_BASE}?#{params}" )
48
+ ## fix: pass in uri (add to fetcher check for is_a? URI etc.)
49
+ uri_string = "#{site_base}?#{query}"
48
50
 
49
- http = http_proxy.new( uri.host, uri.port )
50
- response = http.request( Net::HTTP::Get.new( uri.request_uri ))
51
+ response = @worker.get_response( uri_string )
51
52
 
52
53
  if response.code == '200'
53
54
  t = response.body
55
+ ###
56
+ # NB: Net::HTTP will NOT set encoding UTF-8 etc.
57
+ # will mostly be ASCII
58
+ # - try to change encoding to UTF-8 ourselves
59
+ logger.debug "t.encoding.name (before): #{t.encoding.name}"
60
+ #####
61
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
62
+
63
+ ## NB:
64
+ # for now "hardcoded" to utf8 - what else can we do?
65
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
66
+ t = t.force_encoding( Encoding::UTF_8 )
67
+ logger.debug "t.encoding.name (after): #{t.encoding.name}"
54
68
  ## pp t
55
69
  t
56
70
  else
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
 
2
3
  module Wikiscript
3
4
 
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Wikiscript
3
- VERSION = '0.1.0'
3
+ VERSION = '0.1.1'
4
4
  end
5
5
 
data/test/test_page.rb ADDED
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestPage < MiniTest::Unit::TestCase
8
+
9
+ def setup
10
+ Wikiscript.lang = :en
11
+ end
12
+
13
+ def test_austria_en
14
+ page = Wikiscript::Page.new( 'Austria' )
15
+ text = page.text
16
+
17
+ ## print first 600 chars
18
+ pp text[0..600]
19
+
20
+ ## check for some snippets
21
+ assert( /{{Infobox country/ =~ text )
22
+ assert( /common_name = Austria/ =~ text )
23
+ assert( /capital = \[\[Vienna\]\]/ =~ text )
24
+ assert( /The origins of modern-day Austria date back to the time/ =~ text )
25
+ end
26
+
27
+ def test_sankt_poelten_en
28
+ page = Wikiscript::Page.new( 'Sankt_Pölten' )
29
+ text = page.text
30
+
31
+ ## print first 600 chars
32
+ pp text[0..600]
33
+
34
+ ## check for some snippets
35
+ assert( /{{Infobox Town AT/ =~ text )
36
+ assert( /Name\s+=\s+Sankt Pölten/ =~ text )
37
+ assert( /'''Sankt Pölten''' \(''St. Pölten''\) is the capital city of/ =~ text )
38
+ end
39
+
40
+ end # class TestPage
41
+
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'helper'
5
+
6
+
7
+ class TestPageDe < MiniTest::Unit::TestCase
8
+
9
+ def setup
10
+ Wikiscript.lang = :de
11
+ end
12
+
13
+ def test_st_poelten_de
14
+ page = Wikiscript::Page.new( 'St._Pölten' )
15
+ text = page.text
16
+
17
+ ## print first 600 chars
18
+ pp text[0..600]
19
+
20
+ ## check for some snippets
21
+ assert( /{{Infobox Gemeinde in Österreich/ =~ text )
22
+ assert( /Name\s+=\s+St\. Pölten/ =~ text )
23
+ assert( /'''St\. Pölten''' \(amtlicher Name,/ =~ text )
24
+ assert( /Die Stadt liegt am Fluss \[\[Traisen \(Fluss\)\|Traisen\]\]/ =~ text )
25
+ end
26
+
27
+ end # class TestPageDe
28
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikiscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-04 00:00:00.000000000 Z
12
+ date: 2014-07-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &73243800 !ruby/object:Gem::Requirement
16
+ requirement: &79133900 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,21 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *73243800
24
+ version_requirements: *79133900
25
+ - !ruby/object:Gem::Dependency
26
+ name: fetcher
27
+ requirement: &79133630 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *79133630
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rdoc
27
- requirement: &73243290 !ruby/object:Gem::Requirement
38
+ requirement: &79133350 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: '4.0'
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *73243290
46
+ version_requirements: *79133350
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: hoe
38
- requirement: &73242780 !ruby/object:Gem::Requirement
49
+ requirement: &79133100 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,7 +54,7 @@ dependencies:
43
54
  version: '3.11'
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *73242780
57
+ version_requirements: *79133100
47
58
  description: wikiscript - scripts for wikipedia (get wikitext for page etc.)
48
59
  email: opensport@googlegroups.com
49
60
  executables: []
@@ -62,7 +73,8 @@ files:
62
73
  - lib/wikiscript/page.rb
63
74
  - lib/wikiscript/version.rb
64
75
  - test/helper.rb
65
- - test/test_austria.rb
76
+ - test/test_page.rb
77
+ - test/test_page_de.rb
66
78
  - .gemtest
67
79
  homepage: https://github.com/wikiscript/wikiscript.ruby
68
80
  licenses:
@@ -92,4 +104,5 @@ signing_key:
92
104
  specification_version: 3
93
105
  summary: wikiscript - scripts for wikipedia (get wikitext for page etc.)
94
106
  test_files:
95
- - test/test_austria.rb
107
+ - test/test_page_de.rb
108
+ - test/test_page.rb
data/test/test_austria.rb DELETED
@@ -1,24 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- require 'helper'
5
-
6
-
7
- class TestAustria < MiniTest::Unit::TestCase
8
-
9
- def test_text
10
- page = Wikiscript::Page.new( 'Austria' )
11
- text = page.text
12
-
13
- ## print first 600 chars
14
- pp text[0..600]
15
-
16
- ## check for some snippets
17
- assert( /{{Infobox country/ =~ text )
18
- assert( /common_name = Austria/ =~ text )
19
- assert( /capital = \[\[Vienna\]\]/ =~ text )
20
- assert( /The origins of modern-day Austria date back to the time/ =~ text )
21
- end
22
-
23
- end # class TestAustria
24
-