wikian 0.1.12 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/wikian.rb +12 -1
- data/lib/wikian/contributions.rb +3 -3
- data/lib/wikian/get.rb +33 -36
- data/lib/wikian/monkeypatches.rb +1 -1
- data/lib/wikian/post.rb +62 -16
- data/lib/wikian/search.rb +2 -2
- data/lib/wikian/subcommand.rb +14 -15
- data/lib/wikian/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34b650b2c286774ef79f660715de782f54cd5b86d7690f6055dd662cdda8e1d0
|
4
|
+
data.tar.gz: 4b11d22fc6556d696b75dcd1d0373573e02a8eda58a02d661090be8faf03ec8d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9688be1c774d25208822ccba31d0381f512d9fb161c829631b22d70a824d841ed78b70303a32161f3826649fc1ec34a1af819724a1c34d6c0471d87cdf2ef92f
|
7
|
+
data.tar.gz: 5d3cb888c767d07f3ead8c9dbc56d962042dcb1a719274dd9e28493e8e146ed5807e646af17cf08219737b6e85f3ebe76c60d48854d73f4e1fa0438d2685b490
|
data/Gemfile.lock
CHANGED
data/lib/wikian.rb
CHANGED
@@ -13,6 +13,7 @@ require 'fileutils'
|
|
13
13
|
require 'json'
|
14
14
|
require 'net/http'
|
15
15
|
require 'open-uri'
|
16
|
+
require 'tempfile'
|
16
17
|
require 'yaml'
|
17
18
|
|
18
19
|
class Wikian
|
@@ -43,6 +44,7 @@ class Wikian
|
|
43
44
|
api = Wikian::Get.new(args)
|
44
45
|
api.doit
|
45
46
|
api.extract_wikitext
|
47
|
+
api.save_metadata
|
46
48
|
elsif subcommand[0] == 's'
|
47
49
|
api = Wikian::Search.new(args)
|
48
50
|
api.doit
|
@@ -58,6 +60,15 @@ class Wikian
|
|
58
60
|
puts "#{e.class} #{e.message} in #{__FILE__}"
|
59
61
|
end
|
60
62
|
|
63
|
+
def self.meta_dir
|
64
|
+
'.wikian'
|
65
|
+
end
|
66
|
+
|
67
|
+
# file to store metadata of fetched articles
|
68
|
+
def self.meta_file
|
69
|
+
File.join(meta_dir, 'meta.yml')
|
70
|
+
end
|
71
|
+
|
61
72
|
def help
|
62
73
|
puts <<~eos
|
63
74
|
Usage:
|
@@ -76,7 +87,7 @@ class Wikian
|
|
76
87
|
-v, --version print version number
|
77
88
|
|
78
89
|
Subcommands:
|
79
|
-
c, contributions [N] get user last N contributions
|
90
|
+
c, contributions [N] get user last N contributions (defaults to #{Contributions::DEFAULT_MAX_CONTRIBUTIONS})
|
80
91
|
g, get get wikitext from a Wikipedia article
|
81
92
|
p, post post wikitext to a Wikipedia article
|
82
93
|
s, search search wikitext in Wikipedia
|
data/lib/wikian/contributions.rb
CHANGED
@@ -11,15 +11,15 @@ class Wikian
|
|
11
11
|
|
12
12
|
max_contributions = args.find(&:numeric?) || DEFAULT_MAX_CONTRIBUTIONS
|
13
13
|
|
14
|
-
raise(BadUrlError, "Try passing the '-t' option") unless
|
14
|
+
raise(BadUrlError, "Try passing the '-t' option") unless config['meta']['site']
|
15
15
|
|
16
|
-
@output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' +
|
16
|
+
@output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' + config['meta']['site']
|
17
17
|
|
18
18
|
@params.merge!('ucuser' => ENV['WIKI_USER'], 'uclimit' => max_contributions, 'format' => Wikian::RESPONSE_FORMAT)
|
19
19
|
|
20
20
|
@query = @params.to_query
|
21
21
|
|
22
|
-
@api_url = URI("https://#{
|
22
|
+
@api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
|
23
23
|
rescue => e
|
24
24
|
puts "#{e.class} in #{__FILE__}. #{e.message}"
|
25
25
|
exit
|
data/lib/wikian/get.rb
CHANGED
@@ -1,17 +1,16 @@
|
|
1
1
|
class Wikian
|
2
2
|
class WikianGetError < StandardError; end
|
3
|
-
class ExtractWikiError < WikianGetError; end
|
4
3
|
class ArgumentRequiredError < WikianGetError; end
|
5
4
|
|
6
5
|
class Get < Subcommand
|
7
|
-
attr_accessor :title
|
6
|
+
attr_accessor :title, :url, :latest_revision
|
8
7
|
|
9
8
|
def initialize(args)
|
10
9
|
raise ArgumentRequiredError if args.empty?
|
11
10
|
|
12
11
|
super
|
13
12
|
|
14
|
-
url = URI(args.find{|arg| arg =~ URI.regexp})
|
13
|
+
@url = URI(args.find{|arg| arg =~ URI.regexp})
|
15
14
|
|
16
15
|
raise BadUrlError unless url.path
|
17
16
|
|
@@ -33,46 +32,43 @@ class Wikian
|
|
33
32
|
#
|
34
33
|
# return: nil
|
35
34
|
def extract_wikitext
|
36
|
-
|
37
|
-
raise ExtractWikiError, 'JSON response has no pages'
|
38
|
-
end
|
35
|
+
pages = JSON.parse(res_body).dig('query','pages')
|
39
36
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
File.open(wiki_file,'w') do |f|
|
47
|
-
content = revision.dig('slots', 'main', 'content') ||
|
48
|
-
revision.dig('slots', '*') ||
|
49
|
-
revision.dig('*')
|
50
|
-
STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
|
51
|
-
STDERR.puts "Writing to #{wiki_file}"
|
52
|
-
f.puts content
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
37
|
+
# Wikipedia is inconsistent in their value for 'pages', it's sometimes a hash, sometimes an array
|
38
|
+
@latest_revision = (pages.respond_to?(:keys) ? pages.values.first : pages.first)['revisions'].first
|
39
|
+
|
40
|
+
content = latest_revision.dig('slots', 'main', 'content') ||
|
41
|
+
latest_revision.dig('slots', '*') ||
|
42
|
+
latest_revision.dig('*')
|
56
43
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
end
|
44
|
+
wiki_file= File.basename(response_file, File.extname(response_file)) + '.wiki'
|
45
|
+
|
46
|
+
File.open(wiki_file,'w') do |f|
|
47
|
+
STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
|
48
|
+
STDERR.puts "Writing to #{wiki_file}"
|
49
|
+
f.puts content
|
64
50
|
end
|
65
51
|
|
66
52
|
rescue => e
|
67
|
-
puts "An error occurred while extracting the wikitext"
|
68
|
-
"Try using a new config file by pasing the '-t' option.",
|
69
|
-
"Or pass the '-d' option for debugging"
|
53
|
+
puts "An error occurred while extracting the wikitext"
|
70
54
|
exit
|
71
55
|
end
|
72
56
|
|
57
|
+
# save article metadata
|
58
|
+
#
|
59
|
+
# metadata like article timestamp is used to solve edit conflicts
|
60
|
+
def save_metadata
|
61
|
+
FileUtils.mkdir_p(Wikian.meta_dir)
|
62
|
+
|
63
|
+
metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
|
64
|
+
metadata['meta'].merge!(title => {'timestamp' => latest_revision['timestamp']})
|
65
|
+
|
66
|
+
File.write(Wikian.meta_file, YAML.dump(metadata))
|
67
|
+
end
|
68
|
+
|
73
69
|
def template
|
74
70
|
<<~eos
|
75
|
-
# for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:
|
71
|
+
# for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:Revisions
|
76
72
|
meta:
|
77
73
|
headers:
|
78
74
|
user-agent: Wikian
|
@@ -83,9 +79,10 @@ class Wikian
|
|
83
79
|
- revisions
|
84
80
|
rvprop:
|
85
81
|
- content
|
86
|
-
|
87
|
-
|
88
|
-
|
82
|
+
- timestamp
|
83
|
+
#rvsection: # get specific sections
|
84
|
+
# - 0
|
85
|
+
# - 2
|
89
86
|
rvslots:
|
90
87
|
- main
|
91
88
|
formatversion:
|
data/lib/wikian/monkeypatches.rb
CHANGED
data/lib/wikian/post.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
class Wikian
|
2
2
|
class WikianPostError < StandardError; end
|
3
3
|
class WikiFileError < WikianPostError; end
|
4
|
+
class WikiFileNameError < WikianPostError; end
|
5
|
+
class WikiMergeError < WikianPostError; end
|
4
6
|
|
5
7
|
class Post
|
6
8
|
attr_accessor :args, :baseurl, :header, :input_file, :debug, :login_token,
|
7
|
-
:login_cookie, :csrf_token, :csrf_cookie, :query, :body_text,
|
9
|
+
:login_cookie, :csrf_token, :csrf_cookie, :query, :body_text,
|
10
|
+
:username, :params, :latest_revision, :latest_content, :metadata
|
8
11
|
|
9
12
|
def initialize(args)
|
10
13
|
@args = args
|
11
14
|
|
15
|
+
long_to_short_options
|
16
|
+
|
12
17
|
# input wikitext file
|
13
|
-
@input_file = args.find{|f| File.exist? f}
|
14
|
-
raise WikiFileError unless input_file
|
18
|
+
raise WikiFileError unless @input_file = args.find{|f| File.exist? f}
|
15
19
|
|
16
|
-
site = input_file.match(/\.(.*)\.wiki/)[1
|
20
|
+
site = input_file.match(/\.(.*)\.wiki/)&.[](1)
|
21
|
+
raise(WikiFileNameError, "Use the Input file name convention <article_name>.<site>.wiki") unless site
|
17
22
|
|
18
23
|
@baseurl = "https://#{site}/w/api.php"
|
19
24
|
|
@@ -21,15 +26,20 @@ class Wikian
|
|
21
26
|
|
22
27
|
@username = ENV['WIKI_USER']
|
23
28
|
|
24
|
-
@debug = (args & %w(-d
|
29
|
+
@debug = (args & %w(-d)).length > 0 ? true : false
|
25
30
|
rescue => e
|
26
31
|
puts "#{e.class} in #{__FILE__}. #{e.message}"
|
27
32
|
exit
|
28
33
|
end
|
29
34
|
|
35
|
+
# transform long options like '--message' to short options like '-m'
|
36
|
+
def long_to_short_options
|
37
|
+
args.map! {|opt| opt[0,2] == '--' ? opt[1,2] : opt}
|
38
|
+
end
|
39
|
+
|
30
40
|
def post
|
31
41
|
# remove expired cookie
|
32
|
-
if expired_cookie? || args.have?(%w(-r
|
42
|
+
if expired_cookie? || args.have?(%w(-r))
|
33
43
|
FileUtils.rm_f(csrf_cookie_file)
|
34
44
|
end
|
35
45
|
|
@@ -41,10 +51,17 @@ class Wikian
|
|
41
51
|
|
42
52
|
get_csrf_cookie
|
43
53
|
end
|
44
|
-
get_csrf_token
|
45
|
-
|
46
54
|
build_query_string
|
47
55
|
|
56
|
+
get_latest_revision
|
57
|
+
|
58
|
+
if @body_text && Time.parse(params['starttimestamp']) < Time.parse(params['basetimestamp'])
|
59
|
+
puts "\e[31mEdit conflict detected, merging with latest version\e[m"
|
60
|
+
merge_versions
|
61
|
+
end
|
62
|
+
|
63
|
+
get_csrf_token
|
64
|
+
|
48
65
|
upload_article
|
49
66
|
end
|
50
67
|
|
@@ -84,9 +101,19 @@ class Wikian
|
|
84
101
|
puts(res.body) if debug
|
85
102
|
end
|
86
103
|
|
104
|
+
def get_latest_revision
|
105
|
+
res = URI.open("#{baseurl}?action=query&prop=revisions&titles=#{params['title']}&rvslots=main&rvprop=content|timestamp&format=json")
|
106
|
+
@latest_revision = JSON.parse(res.read).dig('query', 'pages').values.first.dig('revisions').first
|
107
|
+
params['basetimestamp'] = latest_revision['timestamp']
|
108
|
+
@latest_content = latest_revision.dig('slots', 'main', 'content') ||
|
109
|
+
latest_revision.dig('slots', 'main', '*') ||
|
110
|
+
latest_revision.dig('slots', '*') ||
|
111
|
+
latest_revision.dig('*')
|
112
|
+
end
|
113
|
+
|
87
114
|
def get_csrf_token
|
88
115
|
puts("\nGetting csrf token using csrf cookies") if debug
|
89
|
-
url = URI("#{baseurl}?action=query&meta=tokens&format=json&
|
116
|
+
url = URI("#{baseurl}?action=query&meta=tokens&format=json&prop=info|revisions&rvprop=timestamp")
|
90
117
|
res = URI.open(url, header.merge('cookie' => csrf_cookie))
|
91
118
|
json = JSON.parse(res.read)
|
92
119
|
@csrf_token = json.dig('query','tokens','csrftoken')
|
@@ -94,32 +121,51 @@ class Wikian
|
|
94
121
|
end
|
95
122
|
|
96
123
|
def build_query_string
|
97
|
-
params={}
|
124
|
+
@params={}
|
98
125
|
params['action'] = 'edit'
|
99
126
|
params['format'] = Wikian::RESPONSE_FORMAT
|
100
127
|
params['title'] = input_file.sub(/\..*/,'')
|
128
|
+
@metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
|
129
|
+
params['starttimestamp'] =
|
130
|
+
if timestamp = metadata.dig('meta', params['title'], 'timestamp')
|
131
|
+
timestamp
|
132
|
+
else
|
133
|
+
FileUtils.mkdir_p(Wikian.meta_dir)
|
134
|
+
metadata = {'meta' => {'title' => {'timestamp' => File.ctime(input_file).utc.iso8601}}}
|
135
|
+
File.write(Wikian.meta_file, YAML.dump(metadata))
|
136
|
+
end
|
101
137
|
wikitext = File.read(input_file)
|
102
|
-
if args.have?(%w(-a
|
138
|
+
if args.have?(%w(-a))
|
103
139
|
params['appendtext'] = wikitext
|
104
|
-
elsif args.have?(%w(-p
|
140
|
+
elsif args.have?(%w(-p))
|
105
141
|
params['prependtext'] = wikitext
|
106
142
|
else
|
107
143
|
# pass the wikitext in request body
|
108
144
|
@body_text = wikitext
|
109
145
|
end
|
110
|
-
if args.have?(%w(-c
|
146
|
+
if args.have?(%w(-c))
|
111
147
|
params['captchaid'], params['captchaword'] = args[args.index('-c')+1].split(':')
|
112
148
|
end
|
113
|
-
if args.have?(%w(-m
|
149
|
+
if args.have?(%w(-m))
|
114
150
|
params['summary'] = args[args.index('-m')+1]
|
115
151
|
end
|
116
|
-
if args.have?(%w(-s
|
152
|
+
if args.have?(%w(-s))
|
117
153
|
params['section'] = args[args.index('-s')+1]
|
118
154
|
end
|
119
|
-
|
155
|
+
end
|
156
|
+
|
157
|
+
def merge_versions
|
158
|
+
tmp_local = Tempfile.open {|f| f.write @body_text; f}
|
159
|
+
tmp_latest = Tempfile.open {|f| f.write latest_content; f}
|
160
|
+
@body_text = %x(diff --line-format %L #{tmp_local.path} #{tmp_latest.path})
|
161
|
+
metadata['meta'].merge!(params['title'] => {'timestamp' => Time.now.utc.iso8601})
|
162
|
+
rescue => e
|
163
|
+
puts "WikiMergeError in #{__FILE__}"
|
164
|
+
exit
|
120
165
|
end
|
121
166
|
|
122
167
|
def upload_article
|
168
|
+
@query = URI.encode_www_form(params)
|
123
169
|
puts("\nUploading the wiki article using csrf token #{csrf_token}") if debug
|
124
170
|
url = URI("#{baseurl}?#{query}")
|
125
171
|
req = Net::HTTP::Post.new(url, header.merge('cookie' => csrf_cookie, 'content-type' => 'application/x-www-form-urlencoded'))
|
data/lib/wikian/search.rb
CHANGED
@@ -6,13 +6,13 @@ class Wikian
|
|
6
6
|
def initialize(args)
|
7
7
|
super
|
8
8
|
|
9
|
-
@output_file =
|
9
|
+
@output_file = config['api']['srsearch'].first
|
10
10
|
|
11
11
|
@params.merge!('format' => Wikian::RESPONSE_FORMAT)
|
12
12
|
|
13
13
|
@query = @params.to_query
|
14
14
|
|
15
|
-
@api_url = URI("https://#{
|
15
|
+
@api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
|
16
16
|
rescue => e
|
17
17
|
puts "#{e.class} in #{__FILE__}. #{e.message}"
|
18
18
|
exit
|
data/lib/wikian/subcommand.rb
CHANGED
@@ -1,12 +1,11 @@
|
|
1
|
-
#!/usr/bin/env -S ruby -W0
|
2
1
|
class Wikian
|
3
2
|
class WikianSubcommandError < StandardError; end
|
4
|
-
class MissingConfigFileError < WikianSubcommandError; end
|
5
3
|
class BadUrlError < WikianSubcommandError; end
|
6
4
|
|
7
5
|
# class to be inherited by other Wikian classes
|
8
6
|
class Subcommand
|
9
|
-
attr_accessor :args, :res, :
|
7
|
+
attr_accessor :args, :res, :config, :query, :title, :api_url,
|
8
|
+
:debug, :output_file, :res_body
|
10
9
|
|
11
10
|
def initialize(args)
|
12
11
|
@args = args
|
@@ -19,11 +18,15 @@ class Wikian
|
|
19
18
|
|
20
19
|
@debug = (args & %w(-d --debug)).length > 0 ? true : false
|
21
20
|
|
22
|
-
|
23
|
-
|
21
|
+
@config =
|
22
|
+
if File.exist?(Wikian::CONFIG_FILE)
|
23
|
+
YAML.load(File.open(Wikian::CONFIG_FILE))
|
24
|
+
else
|
25
|
+
YAML.load(template)
|
26
|
+
end
|
24
27
|
|
25
28
|
# some params like 'titles' can contain multiple entries joined by '|'. More info in Wikipedia API docs
|
26
|
-
@params = Hash[
|
29
|
+
@params = Hash[config['api'].keys.zip(config['api'].values.map{|arr| arr.join("|")})]
|
27
30
|
rescue MissingConfigFileError => e
|
28
31
|
puts "#{e.class} try passing the '-t' option to generate #{Wikian::CONFIG_FILE} in #{__FILE__}"
|
29
32
|
exit
|
@@ -42,27 +45,23 @@ class Wikian
|
|
42
45
|
|
43
46
|
# HTTP response file name. Its extension depends on the 'content-type' header
|
44
47
|
def response_file
|
45
|
-
output_file + '.' + res['content-type'].split('/').last.sub(/;.*/,'')
|
48
|
+
output_file + '.' + res.meta['content-type'].split('/').last.sub(/;.*/,'')
|
46
49
|
end
|
47
50
|
|
48
51
|
# write response in to `response_file`
|
49
52
|
def write_response
|
50
53
|
STDERR.puts "Writing to #{response_file}"
|
51
54
|
File.open(response_file, 'w') do |f|
|
52
|
-
f.puts prettify(
|
55
|
+
f.puts prettify(res_body)
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
56
59
|
def doit
|
57
60
|
puts api_url if debug
|
58
61
|
|
59
|
-
|
62
|
+
@res=URI.open(api_url, config['meta']['headers'])
|
60
63
|
|
61
|
-
|
62
|
-
|
63
|
-
http.use_ssl = true
|
64
|
-
|
65
|
-
@res=http.request(req)
|
64
|
+
@res_body = res.read
|
66
65
|
|
67
66
|
write_response
|
68
67
|
rescue => e
|
@@ -74,7 +73,7 @@ class Wikian
|
|
74
73
|
|
75
74
|
# if response is JSON prettify it, otherwise return it unchanged
|
76
75
|
def prettify(str)
|
77
|
-
res['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
|
76
|
+
res.meta['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
|
78
77
|
end
|
79
78
|
end
|
80
79
|
end
|
data/lib/wikian/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wikian
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- sergioro
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Get and update Wikipedia articles
|
14
14
|
email:
|