mediawiki_robot 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in mediawiki_robot.gemspec
4
+ gemspec
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,6 @@
1
+ require 'mediawiki_robot/enhanced_gateway'
2
+ require 'mediawiki_robot/robot'
3
+
4
+ module MediawikiRobot
5
+
6
+ end
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'media_wiki'
4
+ require 'media_wiki/gateway'
5
+ require 'media_wiki/config'
6
+
7
+ module MediawikiRobot
8
+
9
+ class EnhancedGateway < MediaWiki::Gateway
10
+
11
+ # overrides the usual 'login' to provide retries upon timeout
12
+ def login(user, pass)
13
+ done = false
14
+ while not done
15
+ begin
16
+ super(user, pass)
17
+ done = true
18
+ rescue SocketError
19
+ puts "MediaWiki::Gateway::login -- caught SocketError, retrying..."
20
+ rescue Errno::ETIMEDOUT
21
+ puts "MediaWiki::Gateway::login -- caught Errno::ETIMEDOUT, retrying..."
22
+ #rescue MediaWiki::Exception
23
+ # puts "MediaWiki::Gateway::login -- caught MediaWiki::Exception, retrying..."
24
+ end
25
+ end
26
+ end
27
+
28
+ def make_api_request_with_retry(form_data)
29
+ res = nil
30
+ while res.nil?
31
+ begin
32
+ res = make_api_request(form_data)
33
+ rescue SocketError
34
+ puts "MediaWiki::Gateway::make_api_request_with_retry -- caught SocketError, retrying..."
35
+ rescue Errno::ETIMEDOUT
36
+ puts "MediaWiki::Gateway::make_api_request_with_retry -- caught Errno::ETIMEDOUT, retrying..."
37
+ rescue MediaWiki::Exception
38
+ puts "MediaWiki::Gateway::make_api_request_with_retry -- caught MediaWiki::Exception, retrying..."
39
+ end
40
+ end
41
+
42
+ return res
43
+ end
44
+
45
+ def get_with_retry(title)
46
+ res = nil
47
+ while res.nil?
48
+ begin
49
+ res = get(title)
50
+ rescue SocketError
51
+ puts "MediaWiki::Gateway::get_with_retry -- caught SocketError, retrying..."
52
+ rescue Errno::ETIMEDOUT
53
+ puts "MediaWiki::Gateway::get_with_retry -- caught Errno::ETIMEDOUT, retrying..."
54
+ rescue MediaWiki::Exception
55
+ puts "MediaWiki::Gateway::get_with_retry -- caught MediaWiki::Exception, retrying..."
56
+ end
57
+ end
58
+
59
+ return res
60
+ end
61
+
62
+ def recent_changes(num_changes, end_time)
63
+ form_data =
64
+ {'action' => 'query',
65
+ 'list' => 'recentchanges'}
66
+ form_data['rclimit'] = num_changes if !num_changes.nil?
67
+ form_data['rcend'] = end_time.strftime("%Y%m%d%H%M%S") if !end_time.nil?
68
+
69
+ res = make_api_request_with_retry(form_data)
70
+
71
+ changes = REXML::XPath.match(res, "//rc").map { |x| { :type => x.attributes["type"],
72
+ :title => x.attributes["title"],
73
+ :timestamp => x.attributes["timestamp"],
74
+ :revision_id => x.attributes["revid"] } }
75
+ return changes
76
+ end
77
+
78
+ def get_all_pages_in_category(category_title) # e.g., 'Category:Articles_with_hCards'
79
+ form_data =
80
+ {'action' => 'query',
81
+ 'list' => 'categorymembers',
82
+ 'cmtitle' => category_title,
83
+ 'cmlimit' => '5000'}
84
+
85
+ res = make_api_request_with_retry(form_data)
86
+
87
+ titles = REXML::XPath.match(res, "//cm").map { |x| x.attributes["title"] }
88
+ return titles
89
+ end
90
+
91
+ def get_page_revision(title)
92
+ form_data =
93
+ {'action' => 'query',
94
+ 'titles' => title,
95
+ 'prop' => 'revisions'}
96
+
97
+ res = make_api_request_with_retry(form_data)
98
+
99
+ rev_ids = REXML::XPath.match(res, "////rev").map { |x| x.attributes["revid"] }
100
+ return rev_ids[0]
101
+ end
102
+
103
+ def exists(page_title)
104
+ # used to be:
105
+ # return ! get_with_retry(page_title).nil?
106
+ # but that was slower.
107
+
108
+ form_data =
109
+ {'action' => 'query',
110
+ 'titles' => page_title,
111
+ 'prop' => 'revisions'}
112
+
113
+ res = make_api_request_with_retry(form_data)
114
+
115
+ rev_ids = REXML::XPath.match(res, "////rev").map { |x| x.attributes["revid"] }
116
+ return !( rev_ids.nil? or rev_ids.empty? )
117
+ end
118
+
119
+ end
120
+
121
+ end
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thread'
4
+ require 'time'
5
+ require 'mediawiki_robot/enhanced_gateway'
6
+
7
+ module MediawikiRobot
8
+
9
+ class Robot
10
+
11
+ attr_accessor :mw
12
+
13
+ API_SUFFIX = '/api.php'
14
+ MAX_SIMULTANEOUS_THREADS = 10
15
+
16
+ def initialize(mw_opts)
17
+ api_url = mw_opts[:base_url] + mw_opts[:normal_prefix] + API_SUFFIX
18
+ @mw = MediawikiRobot::EnhancedGateway.new(api_url, {:ignorewarnings=>1})
19
+
20
+ @thread = nil
21
+
22
+ @mainloop_sleep_secs = 5.0 # make this non-constant so that it can be overridden for testing
23
+ end
24
+
25
+ def start
26
+ @thread = Thread.new { main_loop }
27
+ end
28
+
29
+ def stop
30
+ @thread.kill
31
+ end
32
+
33
+ def is_running
34
+ return false if @thread.nil?
35
+ return true unless @thread.status.nil? or @thread.status == false
36
+ return false
37
+ end
38
+
39
+ private
40
+
41
+ def handle_single_change(change)
42
+ raise "not implemented"
43
+ end
44
+
45
+ def handle_changes(changes)
46
+
47
+ while not changes.empty?
48
+
49
+ # spin up a bunch of threads to pull down these batches of changes in parallel
50
+ threads = []
51
+ for i in 1..MAX_SIMULTANEOUS_THREADS do
52
+ if not changes.empty?
53
+
54
+ threads << Thread.new(changes.shift) do |cur_change|
55
+ handle_single_change(cur_change)
56
+ end
57
+
58
+ end
59
+ end
60
+ threads.each { |aThread| aThread.join }
61
+
62
+ end
63
+
64
+ end
65
+
66
+ def main_loop
67
+
68
+ # find the timestamp of the first change. We only want NEW changes
69
+ num_recent_changes = 500
70
+ prev_time = nil
71
+ changes = @mw.recent_changes(num_recent_changes, prev_time)
72
+
73
+ if !changes.nil? and !changes.empty?
74
+ timestamp_of_first_change = changes[0][:timestamp].gsub(/[-T:]/, ' ').gsub(/Z/,'')
75
+ prev_time = Time.strptime(timestamp_of_first_change, "%Y %m %d %H %M %S")
76
+ prev_time = prev_time + 1 # move 1 sec past the last change
77
+ end
78
+
79
+ while true
80
+
81
+ changes = @mw.recent_changes(num_recent_changes, prev_time)
82
+
83
+ if !changes.nil? and !changes.empty?
84
+ timestamp_of_first_change = changes[0][:timestamp].gsub(/[-T:]/, ' ').gsub(/Z/,'')
85
+ prev_time = Time.strptime(timestamp_of_first_change, "%Y %m %d %H %M %S")
86
+ prev_time = prev_time + 1 # move 1 sec past the last change
87
+
88
+ handle_changes(changes)
89
+ end
90
+
91
+ sleep @mainloop_sleep_secs
92
+ end
93
+ end
94
+
95
+ end
96
+
97
+ end
@@ -0,0 +1,3 @@
1
+ module MediawikiRobot
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "mediawiki_robot/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "mediawiki_robot"
7
+ s.version = MediawikiRobot::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jim Lindstrom"]
10
+ s.email = ["jim.lindstrom@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{MediaWiki robot framework}
13
+ s.description = %q{Gem for creating robots that monitor/maintain MediaWiki-based wikis.}
14
+
15
+ s.rubyforge_project = "mediawiki_robot"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency 'mediawiki-gateway'
23
+ s.add_dependency 'active_support'
24
+ end
@@ -0,0 +1,139 @@
1
+ # mediawiki_enhanced_gateway_spec.rb
2
+
3
+ require 'rubygems'
4
+ require 'mediawiki_robot/enhanced_gateway'
5
+
6
+ DO_MESSY_TESTS = false # These tests muck with the 'recent_changes' list and so I try to minimize them
7
+
8
+ API_SUFFIX = '/api.php'
9
+
10
+ def rand_alphanumeric_str(len)
11
+ o = [('a'..'z'),('A'..'Z'),('0'..'9')].map{|i| i.to_a}.flatten;
12
+ (0..len).map{ o[rand(o.length)] }.join;
13
+ end
14
+
15
+ describe MediawikiRobot::EnhancedGateway do
16
+
17
+ before(:each) do
18
+
19
+ @mw_opts =
20
+ {:base_url => 'http://jimlindstrom.com',
21
+ :normal_prefix => '/mediawiki',
22
+ :special_prefix => '/mediawiki'}
23
+ @robot_acct =
24
+ {:user => "robot",
25
+ :pass => "robotpass"}
26
+
27
+ api_url = @mw_opts[:base_url] + @mw_opts[:normal_prefix] + API_SUFFIX
28
+ @gateway = MediawikiRobot::EnhancedGateway.new(api_url, {:ignorewarnings=>1})
29
+
30
+ @bogus_user = rand_alphanumeric_str(10)
31
+ @bogus_password = rand_alphanumeric_str(10)
32
+
33
+ @nonexistant_page = rand_alphanumeric_str(20)
34
+ @existing_page = "Main_Page"
35
+
36
+ @existing_category = "Category:Wikipedia_protected_templates"
37
+ @page_in_existing_category = "Template:Navbox"
38
+ end
39
+
40
+ describe "#login" do
41
+ it "throws MediaWiki::Unauthorized if bad password" do
42
+ lambda {
43
+ @gateway.login(@robot_acct[:user], @bogus_password)
44
+ }.should raise_error(MediaWiki::Unauthorized)
45
+ end
46
+
47
+ it "throws MediaWiki::Unauthorized if bad username" do
48
+ lambda {
49
+ @gateway.login(@bogus_user, @robot_acct[:pass])
50
+ }.should raise_error(MediaWiki::Unauthorized)
51
+ end
52
+
53
+ it "doesn't throw anything if good credentials" do
54
+ lambda {
55
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
56
+ }.should_not raise_error(MediaWiki::Unauthorized)
57
+ end
58
+ end
59
+
60
+ describe "#exists" do
61
+ it "returns false if page does not exist" do
62
+ @gateway.exists(@nonexistant_page).should == false
63
+ end
64
+
65
+ it "returns true if page exists" do
66
+ if DO_MESSY_TESTS
67
+ @gateway.login(user,pass)
68
+ @gateway.exists(@nonexistant_page).should == false
69
+ @gateway.create(@nonexistant_page, "Testing page creation")
70
+ @gateway.exists(@nonexistant_page).should == true
71
+ @gateway.delete(@nonexistant_page)
72
+ @gateway.exists(@nonexistant_page).should == false
73
+ else
74
+ @gateway.exists(@existing_page).should == true
75
+ end
76
+ end
77
+ end
78
+
79
+ describe "#recent_changes" do
80
+ it "returns the requested number of items" do
81
+ pages = @gateway.recent_changes(50,nil).length.should == 50
82
+ end
83
+
84
+ it "returns no items if no edits since the start time" do
85
+ t = Time.new
86
+ starttime = t.getgm
87
+ pages = @gateway.recent_changes(nil,starttime).length.should == 0
88
+ end
89
+
90
+ it "returns the items edited since the start time" do
91
+ if DO_MESSY_TESTS
92
+ t = Time.new
93
+ starttime = t.getgm
94
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
95
+ @gateway.create(@nonexistant_page, "Testing page creation")
96
+ @gateway.delete(@nonexistant_page)
97
+ pages = @gateway.recent_changes(nil,starttime).length.should >= 1
98
+ else
99
+ t = Time.gm(2010, 7, 8, 9, 10, 11)
100
+ starttime = t.getgm
101
+ @gateway.recent_changes(nil,starttime).length.should >= 1
102
+ end
103
+ end
104
+
105
+ it "returns a list of items that are hashes containing 'type', 'title', 'timestamp', and 'revision_id' keys" do
106
+ if DO_MESSY_TESTS
107
+ t = Time.new
108
+ starttime = t.getgm
109
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
110
+ @gateway.create(@nonexistant_page, "Testing page creation")
111
+ @gateway.recent_changes(nil,starttime).should == [ { :type => "new", :title => @nonexistant_page } ] # FIXME: needs to take into account revision_id
112
+ @gateway.delete(@nonexistant_page)
113
+ else
114
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
115
+ @gateway.recent_changes(10,nil)[0].keys.should == [:type, :title, :timestamp, :revision_id]
116
+ end
117
+ end
118
+ end
119
+
120
+ describe "#get" do
121
+ it "returns nil if doesn't exist" do
122
+ @gateway.get(@nonexistant_page).nil?.should == true
123
+ end
124
+ it "returns a String object" do
125
+ @gateway.get(@existing_page).class.should == String
126
+ end
127
+ end
128
+
129
+ describe "#get_all_pages_in_category" do
130
+ it "returns a list" do
131
+ @gateway.get_all_pages_in_category(@existing_category).class.should == Array
132
+ end
133
+ it "returns a list of all the person pages on the mediawiki" do
134
+ @gateway.get_all_pages_in_category(@existing_category).index(@page_in_existing_category).nil?.should == false
135
+ @gateway.get_all_pages_in_category(@existing_category).length.should > 1
136
+ end
137
+ end
138
+
139
+ end
@@ -0,0 +1,109 @@
1
+ # mediawiki_robot_spec.rb
2
+
3
+ require 'rubygems'
4
+ require 'mediawiki_robot'
5
+
6
+ def wait_on_condition(max_retries, wait_time, condition)
7
+
8
+ retries = max_retries
9
+ while retries > 0 and condition.call==false do
10
+ sleep wait_time
11
+ retries -= 1
12
+ end
13
+
14
+ end
15
+
16
+ def rand_alphanumeric_str(len)
17
+ o = [('a'..'z'),('A'..'Z'),('0'..'9')].map{|i| i.to_a}.flatten;
18
+ (0..len).map{ o[rand(o.length)] }.join;
19
+ end
20
+
21
+ class RobotWithTestHarness < MediawikiRobot::Robot
22
+
23
+ def initialize(mw_opts)
24
+ super(mw_opts)
25
+
26
+ @last_change = nil
27
+ @mainloop_sleep_secs = 0.1
28
+ end
29
+
30
+ def handle_single_change(change)
31
+ @last_change = change
32
+ end
33
+
34
+ def get_last_change
35
+ return @last_change
36
+ end
37
+
38
+ end
39
+
40
+ describe MediawikiRobot::Robot do
41
+
42
+ before(:each) do
43
+
44
+ @mw_opts =
45
+ {:base_url => 'http://jimlindstrom.com',
46
+ :normal_prefix => '/mediawiki',
47
+ :special_prefix => '/mediawiki'}
48
+ @robot_acct =
49
+ {:user => "robot",
50
+ :pass => "robotpass"}
51
+
52
+ @robot = RobotWithTestHarness.new(@mw_opts)
53
+ end
54
+
55
+ describe "#start" do
56
+ it "starts the main loop" do
57
+ @robot.start
58
+
59
+ wait_on_condition(10, 0.1, lambda { @robot.is_running } )
60
+
61
+ @robot.is_running.should == true
62
+ @robot.stop
63
+ end
64
+
65
+ it "doesn't call 'handle_single_change' unless a page on the mediawiki is changed" do
66
+ @robot.start
67
+ @robot.mw.login(@robot_acct[:user], @robot_acct[:pass])
68
+
69
+ last_change = @robot.get_last_change
70
+ last_change.nil?.should == true
71
+
72
+ @robot.stop
73
+ end
74
+
75
+ it "calls 'handle_single_change' when a page on the mediawiki is changed" do
76
+ @robot.start
77
+ @robot.mw.login(@robot_acct[:user], @robot_acct[:pass])
78
+
79
+ rand_page = rand_alphanumeric_str(20)
80
+ @robot.mw.create(rand_page, "Testing robot functionality")
81
+
82
+ sleep 0.5
83
+
84
+ last_change = @robot.get_last_change
85
+ last_change.nil?.should == false
86
+ last_change.delete(:revision_id)
87
+ last_change.delete(:timestamp)
88
+ last_change.should == {:type=>"new", :title=>rand_page }
89
+
90
+ @robot.mw.delete(rand_page)
91
+
92
+ @robot.stop
93
+ end
94
+ end
95
+
96
+ describe "#stop" do
97
+ it "stops the main loop" do
98
+ @robot.start
99
+ sleep 0.1
100
+ @robot.stop
101
+
102
+ wait_on_condition(10, 0.1, lambda { !@robot.is_running } )
103
+
104
+ @robot.is_running.should == false
105
+ end
106
+ end
107
+
108
+ end
109
+
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mediawiki_robot
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jim Lindstrom
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-06-14 00:00:00.000000000 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mediawiki-gateway
17
+ requirement: &79825670 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *79825670
26
+ - !ruby/object:Gem::Dependency
27
+ name: active_support
28
+ requirement: &79825460 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: *79825460
37
+ description: Gem for creating robots that monitor/maintain MediaWiki-based wikis.
38
+ email:
39
+ - jim.lindstrom@gmail.com
40
+ executables: []
41
+ extensions: []
42
+ extra_rdoc_files: []
43
+ files:
44
+ - .gitignore
45
+ - Gemfile
46
+ - Rakefile
47
+ - lib/mediawiki_robot.rb
48
+ - lib/mediawiki_robot/enhanced_gateway.rb
49
+ - lib/mediawiki_robot/robot.rb
50
+ - lib/mediawiki_robot/version.rb
51
+ - mediawiki_robot.gemspec
52
+ - specs/mediawiki_robot_enhanced_gateway_spec.rb
53
+ - specs/mediawiki_robot_robot_spec.rb
54
+ has_rdoc: true
55
+ homepage: ''
56
+ licenses: []
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubyforge_project: mediawiki_robot
75
+ rubygems_version: 1.6.2
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: MediaWiki robot framework
79
+ test_files: []