wikiranger 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +26 -0
- data/LICENSE.txt +21 -0
- data/README.md +95 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/wikiranger +142 -0
- data/lib/wikiranger/thread_pool.rb +31 -0
- data/lib/wikiranger/util.rb +19 -0
- data/lib/wikiranger/version.rb +3 -0
- data/lib/wikiranger/wikipedia/user_contribution.rb +20 -0
- data/lib/wikiranger/wikipedia.rb +50 -0
- data/lib/wikiranger.rb +25 -0
- data/wikiranger.gemspec +30 -0
- metadata +132 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 52617871050938bbf1b5db6723a178e82ebffb0b
|
|
4
|
+
data.tar.gz: d1a3c7941a801cb21214b29d0c565fbfc86decfc
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 27942efad962fae9cef47deb31f3ee55833eaabf6ede6ef6c841add21f585179b499349734b11758b57997bd741289e69f1371456cfa2e0e11ab03e9c5e9fbc8
|
|
7
|
+
data.tar.gz: 0e51333984e97e4155671614e59da774a8fe5013faef746ece51ebdb911a8c1030633ef59e3297ff0869acbe24100c38ff437a06d081a53d8729d2d877cb743d
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
wikiranger (0.1.0)
|
|
5
|
+
colorize (~> 0.8.1)
|
|
6
|
+
ruby-progressbar (~> 1.9)
|
|
7
|
+
|
|
8
|
+
GEM
|
|
9
|
+
remote: https://rubygems.org/
|
|
10
|
+
specs:
|
|
11
|
+
colorize (0.8.1)
|
|
12
|
+
minitest (5.10.3)
|
|
13
|
+
rake (10.5.0)
|
|
14
|
+
ruby-progressbar (1.9.0)
|
|
15
|
+
|
|
16
|
+
PLATFORMS
|
|
17
|
+
ruby
|
|
18
|
+
|
|
19
|
+
DEPENDENCIES
|
|
20
|
+
bundler (~> 1.16)
|
|
21
|
+
minitest (~> 5.0)
|
|
22
|
+
rake (~> 10.0)
|
|
23
|
+
wikiranger!
|
|
24
|
+
|
|
25
|
+
BUNDLED WITH
|
|
26
|
+
1.16.0
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2018 Michael Henriksen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Wikiranger
|
|
2
|
+
|
|
3
|
+
When an anonymous contribution is made on [Wikipedia], the IP address is recorded as the author of the contribution. This simple tool can
|
|
4
|
+
collect information on Wiki contributions across an arbitrary amount of IP CIDR ranges, and display a list of top contributing IP addresses as well as a list of most edited pages. Wikiranger can also write all contribution data to a CSV file for further analysis with other tools.
|
|
5
|
+
|
|
6
|
+
Wikiranger can be used to determine an organization's gateway IP address, or to get a better understanding of the topics that an organization is concerned about.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
$ gem install wikiranger
|
|
11
|
+
|
|
12
|
+
## Usage
|
|
13
|
+
|
|
14
|
+
### Example:
|
|
15
|
+
|
|
16
|
+
Analysing Wiki activity from the Federal Communications Commission (FCC):
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
$ wikiranger --csv /tmp/fcc_contribs.csv 192.104.54.0/24 192.133.125.0/24
|
|
20
|
+
_ _ _
|
|
21
|
+
_ _ _|_| |_|_|___ ___ ___ ___ ___ ___
|
|
22
|
+
| | | | | '_| | _| .'| | . | -_| _|
|
|
23
|
+
|_____|_|_,_|_|_| |__,|_|_|_ |___|_| v0.1.0
|
|
24
|
+
by @michenriksen |___|
|
|
25
|
+
|
|
26
|
+
[+] Gathering Wiki contributions for 508 hosts: |===============================================================================================| 100%
|
|
27
|
+
[+] Gathered 437 Wiki contributions
|
|
28
|
+
[+] Wrote Wiki contribution data to /tmp/fcc_contribs.csv
|
|
29
|
+
|
|
30
|
+
TOP 10 CONTRIBUTORS:
|
|
31
|
+
192.104.54.21: 313 contributions (latest: Tue, 03 Aug 2010 20:55:32 +0000)
|
|
32
|
+
192.104.54.79: 49 contributions (latest: Tue, 25 Apr 2017 19:50:05 +0000)
|
|
33
|
+
192.104.54.179: 31 contributions (latest: Wed, 13 Sep 2017 17:38:40 +0000)
|
|
34
|
+
192.104.54.77: 23 contributions (latest: Thu, 03 Jan 2013 20:11:51 +0000)
|
|
35
|
+
192.104.54.166: 11 contributions (latest: Tue, 06 Dec 2011 14:26:39 +0000)
|
|
36
|
+
192.104.54.4: 9 contributions (latest: Fri, 10 Sep 2004 18:19:03 +0000)
|
|
37
|
+
192.104.54.1: 1 contributions (latest: Wed, 03 Mar 2004 19:59:59 +0000)
|
|
38
|
+
|
|
39
|
+
TOP 10 MOST EDITED PAGES:
|
|
40
|
+
Talk:Star Trek Into Darkness: 461KB (https://en.wikipedia.org/?curid=33786129)
|
|
41
|
+
Federal Communications Commission: 284KB (https://en.wikipedia.org/?curid=55974)
|
|
42
|
+
Aquarium: 241KB (https://en.wikipedia.org/?curid=19230351)
|
|
43
|
+
Henry James: 235KB (https://en.wikipedia.org/?curid=83117)
|
|
44
|
+
Hurricane Katrina: 187KB (https://en.wikipedia.org/?curid=2569378)
|
|
45
|
+
Edgar Cayce: 185KB (https://en.wikipedia.org/?curid=159184)
|
|
46
|
+
World of Warcraft Trading Card Game: 167KB (https://en.wikipedia.org/?curid=4053759)
|
|
47
|
+
Charlie Adam: 108KB (https://en.wikipedia.org/?curid=5232021)
|
|
48
|
+
Voice over IP: 107KB (https://en.wikipedia.org/?curid=75028)
|
|
49
|
+
Photography: 106KB (https://en.wikipedia.org/?curid=23604)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The `/tmp/fcc_contribs.csv` file contains complete information of all contributions:
|
|
53
|
+
|
|
54
|
+
```csv
|
|
55
|
+
|
|
56
|
+
user,page_id,rev_id,parent_id,title,timestamp,comment,size
|
|
57
|
+
192.104.54.1,490292,4204587,2615510,Morgan Bible,2004-03-03T19:59:59Z,"",1944
|
|
58
|
+
192.104.54.4,23604,5842054,5787335,Photography,2004-09-10T18:19:03Z,/* History of photography */,18178
|
|
59
|
+
192.104.54.4,23604,5787335,5787318,Photography,2004-09-10T18:18:12Z,/* Obtaining Photography */,18169
|
|
60
|
+
192.104.54.4,23604,5787318,5787281,Photography,2004-09-10T18:16:16Z,/* Obtaining Photography */,18194
|
|
61
|
+
192.104.54.4,23604,5787281,5787047,Photography,2004-09-10T18:04:04Z,/* Obtaining Photography */,18169
|
|
62
|
+
192.104.54.4,23604,5787047,5786863,Photography,2004-09-10T17:55:22Z,/* History of photography */,18101
|
|
63
|
+
192.104.54.4,23604,5786863,5786846,Photography,2004-09-10T17:54:25Z,/* History of photography */,18096
|
|
64
|
+
192.104.54.4,19230351,5201932,4607534,Aquarium,2004-07-13T16:35:26Z,/* External links */,6074
|
|
65
|
+
192.104.54.4,19230351,4607534,4607480,Aquarium,2004-07-13T16:33:41Z,/* Aquarium Hobby */,5905
|
|
66
|
+
. . .
|
|
67
|
+
192.104.54.179,9673623,621253922,532212692,The Review of Litigation,2014-08-14T19:53:44Z,"",3017
|
|
68
|
+
192.104.54.179,27974327,612800294,590442127,Mignon Clyburn,2014-06-13T18:54:30Z,"",3625
|
|
69
|
+
192.104.54.179,92032,594287736,591586594,"Coos County, Oregon",2014-02-06T23:45:38Z,"The Coquille Indian Tribe still exists, is federally-recognized, and has Coos County designated as part of its Tribal ""service area"" pursuant to the Coquille Restoration Act.",11450
|
|
70
|
+
192.104.54.179,17917988,578732844,518057467,Windy Run,2013-10-25T19:41:52Z,/* Pronunciation and Origin of Name */,2288
|
|
71
|
+
192.104.54.179,20420662,569459681,569430538,Rickenbacker 4001,2013-08-20T19:09:21Z,"",6258
|
|
72
|
+
192.104.54.179,23495051,566849435,566849128,RabbitEars,2013-08-02T12:45:22Z,Fixed the journalist link.,28549
|
|
73
|
+
192.104.54.179,23495051,566849128,530941466,RabbitEars,2013-08-02T12:42:43Z,Added new article.,28536
|
|
74
|
+
192.104.54.179,3869822,564780654,555162817,WDTV,2013-07-18T12:17:47Z,"",12739
|
|
75
|
+
192.104.54.179,5787700,561537025,548727677,Allen & Company,2013-06-25T16:19:57Z,"",5280
|
|
76
|
+
192.104.54.179,15516,555105308,555032370,Intelsat,2013-05-14T20:05:36Z,"/* History */ Minior edit, ""of"" -> ""over""",26077
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
See `wikiranger --help` for more options.
|
|
80
|
+
|
|
81
|
+
## Development
|
|
82
|
+
|
|
83
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
84
|
+
|
|
85
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
86
|
+
|
|
87
|
+
## Contributing
|
|
88
|
+
|
|
89
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/michenriksen/wikiranger.
|
|
90
|
+
|
|
91
|
+
## License
|
|
92
|
+
|
|
93
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
|
94
|
+
|
|
95
|
+
[Wikipedia]: https://www.wikipedia.org/
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "wikiranger"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/wikiranger
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "wikiranger"
|
|
4
|
+
|
|
5
|
+
options = {
|
|
6
|
+
:csv => nil,
|
|
7
|
+
:threads => 5,
|
|
8
|
+
:top => 10,
|
|
9
|
+
:api_base_url => Wikiranger::Wikipedia::DEFAULT_API_BASE_URI
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
begin
|
|
13
|
+
OptionParser.new do |opts|
|
|
14
|
+
opts.banner = "Usage: #{$0} [options] cidr [cidr2] ... [cidrN]"
|
|
15
|
+
|
|
16
|
+
opts.on("--csv DESTINATION", "Write Wiki contribution data to CSV file") do |v|
|
|
17
|
+
if File.exists?(v)
|
|
18
|
+
puts "ERROR: File #{v} does already exist".red
|
|
19
|
+
exit 1
|
|
20
|
+
end
|
|
21
|
+
options[:csv] = v
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
opts.on("-t", "--threads THREADS", "Amount of threads to use (default: #{options[:threads]})") do |v|
|
|
25
|
+
v = v.to_i
|
|
26
|
+
v = 1 if v <= 0
|
|
27
|
+
options[:threads] = v
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
opts.on("--top NUMBER", "Number to show from top lists (default: #{options[:top]})") do |v|
|
|
31
|
+
v = v.to_i
|
|
32
|
+
v = options[:top] if v <= 0
|
|
33
|
+
options[:top] = v
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
opts.on("--api-base-url URL", "Set base URL for Wiki API calls (default: #{options[:api_base_url]})") do |v|
|
|
37
|
+
options[:api_base_url] = v
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
|
41
|
+
puts opts
|
|
42
|
+
exit
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
opts.on_tail("--version", "Show version") do
|
|
46
|
+
puts Wikiranger::VERSION
|
|
47
|
+
exit
|
|
48
|
+
end
|
|
49
|
+
end.parse!
|
|
50
|
+
rescue OptionParser::InvalidOption => e
|
|
51
|
+
puts "ERROR: #{e.message}".red
|
|
52
|
+
puts "See -h or --help for usage"
|
|
53
|
+
exit 1
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
if ARGV.empty?
|
|
57
|
+
puts "ERROR: No CIDR ranges given".red
|
|
58
|
+
puts "See -h or --help for usage"
|
|
59
|
+
exit 1
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
puts "#{Wikiranger::BANNER}".bold
|
|
63
|
+
|
|
64
|
+
hosts = []
|
|
65
|
+
ARGV.each do |cidr|
|
|
66
|
+
hosts += Wikiranger::Util.expand_cidr(cidr)
|
|
67
|
+
end
|
|
68
|
+
hosts.uniq!
|
|
69
|
+
|
|
70
|
+
contributions = []
|
|
71
|
+
pool = Wikiranger::ThreadPool.new(options[:threads])
|
|
72
|
+
mutex = Mutex.new
|
|
73
|
+
wiki = Wikiranger::Wikipedia.new(:api_base_uri => options[:api_base_url])
|
|
74
|
+
progress = ProgressBar.create(
|
|
75
|
+
:title => "[+] Gathering Wiki contributions for #{hosts.count} hosts",
|
|
76
|
+
:total => hosts.count,
|
|
77
|
+
:format => "%t: |%B| %p%%"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
hosts.each do |host|
|
|
81
|
+
pool.schedule do
|
|
82
|
+
begin
|
|
83
|
+
contribs = wiki.user_contributions(host)
|
|
84
|
+
mutex.synchronize { contributions += contribs }
|
|
85
|
+
progress.increment
|
|
86
|
+
rescue => e
|
|
87
|
+
progress.log("[-] ERROR: #{e.class}: #{e.message}")
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
pool.shutdown
|
|
93
|
+
puts "[+] Gathered #{contributions.count} Wiki contributions"
|
|
94
|
+
|
|
95
|
+
if contributions.count.zero?
|
|
96
|
+
puts "[+] Nothing more to do"
|
|
97
|
+
exit
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
if options[:csv]
|
|
101
|
+
CSV.open(options[:csv], "wb") do |csv|
|
|
102
|
+
csv << ["user", "page_id", "rev_id", "parent_id", "title", "timestamp", "comment", "size"]
|
|
103
|
+
contributions.each do |contribution|
|
|
104
|
+
csv << contribution.to_csv_array
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
puts "[+] Wrote Wiki contribution data to #{options[:csv].bold}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
top_contributors = {}
|
|
111
|
+
top_pages = {}
|
|
112
|
+
|
|
113
|
+
contributions.each do |contribution|
|
|
114
|
+
if !top_contributors.key?(contribution.user)
|
|
115
|
+
top_contributors[contribution.user] = [1, contribution.timestamp_object]
|
|
116
|
+
else
|
|
117
|
+
contribution_count, timestamp = top_contributors[contribution.user]
|
|
118
|
+
timestamp = contribution.timestamp_object if contribution.timestamp_object > timestamp
|
|
119
|
+
top_contributors[contribution.user] = [contribution_count + 1, timestamp]
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
if !top_pages.key?(contribution.pageid)
|
|
123
|
+
top_pages[contribution.pageid] = [contribution.size, contribution.title]
|
|
124
|
+
else
|
|
125
|
+
contribution_bytes, title = top_pages[contribution.pageid]
|
|
126
|
+
top_pages[contribution.pageid] = [contribution_bytes + contribution.size, title]
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
puts "\nTOP #{options[:top]} CONTRIBUTORS:".bold
|
|
131
|
+
top_contributors.sort_by { |k, v| v.first }.reverse.take(options[:top]).each do |contributor|
|
|
132
|
+
user = contributor.first
|
|
133
|
+
contributions, latest = contributor.last
|
|
134
|
+
puts "#{user.bold}: #{contributions} contributions (latest: #{latest.strftime('%a, %d %b %Y %H:%M:%S %z')})"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
puts "\nTOP #{options[:top]} MOST EDITED PAGES:".bold
|
|
138
|
+
top_pages.sort_by { |k, v| v.first }.reverse.take(options[:top]).each do |page|
|
|
139
|
+
page_id = page.first
|
|
140
|
+
contribution_bytes, title = page.last
|
|
141
|
+
puts "#{title.bold}: #{Wikiranger::Util.number_to_human_size(contribution_bytes)} (https://en.wikipedia.org/?curid=#{page_id})"
|
|
142
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module Wikiranger
|
|
2
|
+
class ThreadPool
|
|
3
|
+
attr_reader :size, :jobs, :pool
|
|
4
|
+
|
|
5
|
+
def initialize(size)
|
|
6
|
+
@size = size.to_i
|
|
7
|
+
@jobs = Queue.new
|
|
8
|
+
@pool = Array.new(size) do
|
|
9
|
+
Thread.new do
|
|
10
|
+
catch(:exit) do
|
|
11
|
+
loop do
|
|
12
|
+
job, args = @jobs.pop
|
|
13
|
+
job.call(*args)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def schedule(*args, &block)
|
|
21
|
+
@jobs << [block, args]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def shutdown
|
|
25
|
+
@size.times do
|
|
26
|
+
schedule { throw :exit }
|
|
27
|
+
end
|
|
28
|
+
@pool.map(&:join)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Wikiranger
|
|
2
|
+
module Util
|
|
3
|
+
HUMAN_PREFIXES = %w(TB GB MB KB B).freeze
|
|
4
|
+
|
|
5
|
+
def self.expand_cidr(cidr)
|
|
6
|
+
IPAddr.new(cidr).to_range.map(&:to_s)[1..-2]
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def self.number_to_human_size(number)
|
|
10
|
+
s = number.to_f
|
|
11
|
+
i = HUMAN_PREFIXES.length - 1
|
|
12
|
+
while s > 512 && i > 0
|
|
13
|
+
i -= 1
|
|
14
|
+
s /= 1024
|
|
15
|
+
end
|
|
16
|
+
((s > 9 || s.modulo(1) < 0.1 ? "%d" : "%.1f") % s) + HUMAN_PREFIXES[i]
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
module Wikiranger
|
|
2
|
+
class Wikipedia
|
|
3
|
+
class UserContribution < OpenStruct
|
|
4
|
+
|
|
5
|
+
PAGE_BASE_URL = "https://en.wikipedia.org/?curid=".freeze
|
|
6
|
+
|
|
7
|
+
def page_url
|
|
8
|
+
PAGE_BASE_URL + self.pageid.to_s
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def timestamp_object
|
|
12
|
+
@timestamp_object ||= DateTime.parse(self.timestamp)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def to_csv_array
|
|
16
|
+
[self.user, self.pageid, self.revid, self.parentid, self.title, self.timestamp, self.comment, self.size]
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
module Wikiranger
|
|
2
|
+
class Wikipedia
|
|
3
|
+
attr_reader :options
|
|
4
|
+
|
|
5
|
+
DEFAULT_API_BASE_URI = "https://en.wikipedia.org"
|
|
6
|
+
|
|
7
|
+
class Error < StandardError; end
|
|
8
|
+
class UnexpectedResponseError < Wikiranger::Wikipedia::Error; end
|
|
9
|
+
|
|
10
|
+
def initialize(options = {})
|
|
11
|
+
@options = options
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def user_contributions(user)
|
|
15
|
+
uri = "#{api_base_uri}/w/api.php?action=query&format=json&list=usercontribs&uclimit=max&ucuser=#{URI.escape(user)}&ucdir=older"
|
|
16
|
+
response = request(uri)
|
|
17
|
+
if response.code.to_i != 200
|
|
18
|
+
unexpected_response!("Unexpected response code: #{response.code} when retrieving user contributions for #{user}")
|
|
19
|
+
end
|
|
20
|
+
parsed_body = JSON.parse(response.body)
|
|
21
|
+
contributions = []
|
|
22
|
+
parsed_body["query"]["usercontribs"].each do |contribution|
|
|
23
|
+
contributions.push(Wikiranger::Wikipedia::UserContribution.new(contribution))
|
|
24
|
+
end
|
|
25
|
+
contributions
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def request(uri)
|
|
31
|
+
uri = URI.parse(uri)
|
|
32
|
+
http = http_object(uri)
|
|
33
|
+
http.get(uri.request_uri)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def http_object(uri)
|
|
37
|
+
http = Net::HTTP.new(uri.host, uri.port).tap do |h|
|
|
38
|
+
h.use_ssl = (uri.scheme == "https")
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def api_base_uri
|
|
43
|
+
options[:api_base_uri] || DEFAULT_API_BASE_URI
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def unexpected_response!(message)
|
|
47
|
+
fail Wikiranger::Wikipedia::UnexpectedResponseError, message
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
data/lib/wikiranger.rb
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require "ostruct"
|
|
2
|
+
require "uri"
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "json"
|
|
5
|
+
require "optparse"
|
|
6
|
+
require "ipaddr"
|
|
7
|
+
require "date"
|
|
8
|
+
require "csv"
|
|
9
|
+
|
|
10
|
+
require "colorize"
|
|
11
|
+
require "ruby-progressbar"
|
|
12
|
+
|
|
13
|
+
require "wikiranger/version"
|
|
14
|
+
require "wikiranger/util"
|
|
15
|
+
require "wikiranger/thread_pool"
|
|
16
|
+
require "wikiranger/wikipedia"
|
|
17
|
+
require "wikiranger/wikipedia/user_contribution"
|
|
18
|
+
|
|
19
|
+
module Wikiranger
|
|
20
|
+
BANNER = " _ _ _\n" +
|
|
21
|
+
" _ _ _|_| |_|_|___ ___ ___ ___ ___ ___\n" +
|
|
22
|
+
"| | | | | '_| | _| .'| | . | -_| _|\n" +
|
|
23
|
+
"|_____|_|_,_|_|_| |__,|_|_|_ |___|_| v#{Wikiranger::VERSION}\n" +
|
|
24
|
+
" by @michenriksen |___|\n".freeze
|
|
25
|
+
end
|
data/wikiranger.gemspec
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
|
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require "wikiranger/version"
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |spec|
|
|
7
|
+
spec.name = "wikiranger"
|
|
8
|
+
spec.version = Wikiranger::VERSION
|
|
9
|
+
spec.authors = ["Michael Henriksen"]
|
|
10
|
+
spec.email = ["michenriksen@neomailbox.ch"]
|
|
11
|
+
|
|
12
|
+
spec.summary = %q{Gather information on Wiki contributions from IP ranges}
|
|
13
|
+
spec.description = %q{Gather information on Wiki contributions from IP ranges}
|
|
14
|
+
spec.homepage = "https://github.com/michenriksen/wikiranger"
|
|
15
|
+
spec.license = "MIT"
|
|
16
|
+
|
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
|
19
|
+
end
|
|
20
|
+
spec.bindir = "exe"
|
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
22
|
+
spec.require_paths = ["lib"]
|
|
23
|
+
|
|
24
|
+
spec.add_dependency "ruby-progressbar", "~> 1.9"
|
|
25
|
+
spec.add_dependency "colorize", "~> 0.8.1"
|
|
26
|
+
|
|
27
|
+
spec.add_development_dependency "bundler", "~> 1.16"
|
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
|
29
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
|
30
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: wikiranger
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Michael Henriksen
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2018-01-14 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: ruby-progressbar
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.9'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.9'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: colorize
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 0.8.1
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 0.8.1
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: bundler
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.16'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '1.16'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rake
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '10.0'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '10.0'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: minitest
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '5.0'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '5.0'
|
|
83
|
+
description: Gather information on Wiki contributions from IP ranges
|
|
84
|
+
email:
|
|
85
|
+
- michenriksen@neomailbox.ch
|
|
86
|
+
executables:
|
|
87
|
+
- wikiranger
|
|
88
|
+
extensions: []
|
|
89
|
+
extra_rdoc_files: []
|
|
90
|
+
files:
|
|
91
|
+
- ".gitignore"
|
|
92
|
+
- ".travis.yml"
|
|
93
|
+
- Gemfile
|
|
94
|
+
- Gemfile.lock
|
|
95
|
+
- LICENSE.txt
|
|
96
|
+
- README.md
|
|
97
|
+
- Rakefile
|
|
98
|
+
- bin/console
|
|
99
|
+
- bin/setup
|
|
100
|
+
- exe/wikiranger
|
|
101
|
+
- lib/wikiranger.rb
|
|
102
|
+
- lib/wikiranger/thread_pool.rb
|
|
103
|
+
- lib/wikiranger/util.rb
|
|
104
|
+
- lib/wikiranger/version.rb
|
|
105
|
+
- lib/wikiranger/wikipedia.rb
|
|
106
|
+
- lib/wikiranger/wikipedia/user_contribution.rb
|
|
107
|
+
- wikiranger.gemspec
|
|
108
|
+
homepage: https://github.com/michenriksen/wikiranger
|
|
109
|
+
licenses:
|
|
110
|
+
- MIT
|
|
111
|
+
metadata: {}
|
|
112
|
+
post_install_message:
|
|
113
|
+
rdoc_options: []
|
|
114
|
+
require_paths:
|
|
115
|
+
- lib
|
|
116
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
117
|
+
requirements:
|
|
118
|
+
- - ">="
|
|
119
|
+
- !ruby/object:Gem::Version
|
|
120
|
+
version: '0'
|
|
121
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
|
+
requirements:
|
|
123
|
+
- - ">="
|
|
124
|
+
- !ruby/object:Gem::Version
|
|
125
|
+
version: '0'
|
|
126
|
+
requirements: []
|
|
127
|
+
rubyforge_project:
|
|
128
|
+
rubygems_version: 2.6.14
|
|
129
|
+
signing_key:
|
|
130
|
+
specification_version: 4
|
|
131
|
+
summary: Gather information on Wiki contributions from IP ranges
|
|
132
|
+
test_files: []
|