wikian 0.1.12 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/wikian.rb +12 -1
- data/lib/wikian/contributions.rb +3 -3
- data/lib/wikian/get.rb +33 -36
- data/lib/wikian/monkeypatches.rb +1 -1
- data/lib/wikian/post.rb +62 -16
- data/lib/wikian/search.rb +2 -2
- data/lib/wikian/subcommand.rb +14 -15
- data/lib/wikian/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 34b650b2c286774ef79f660715de782f54cd5b86d7690f6055dd662cdda8e1d0
|
4
|
+
data.tar.gz: 4b11d22fc6556d696b75dcd1d0373573e02a8eda58a02d661090be8faf03ec8d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9688be1c774d25208822ccba31d0381f512d9fb161c829631b22d70a824d841ed78b70303a32161f3826649fc1ec34a1af819724a1c34d6c0471d87cdf2ef92f
|
7
|
+
data.tar.gz: 5d3cb888c767d07f3ead8c9dbc56d962042dcb1a719274dd9e28493e8e146ed5807e646af17cf08219737b6e85f3ebe76c60d48854d73f4e1fa0438d2685b490
|
data/Gemfile.lock
CHANGED
data/lib/wikian.rb
CHANGED
@@ -13,6 +13,7 @@ require 'fileutils'
|
|
13
13
|
require 'json'
|
14
14
|
require 'net/http'
|
15
15
|
require 'open-uri'
|
16
|
+
require 'tempfile'
|
16
17
|
require 'yaml'
|
17
18
|
|
18
19
|
class Wikian
|
@@ -43,6 +44,7 @@ class Wikian
|
|
43
44
|
api = Wikian::Get.new(args)
|
44
45
|
api.doit
|
45
46
|
api.extract_wikitext
|
47
|
+
api.save_metadata
|
46
48
|
elsif subcommand[0] == 's'
|
47
49
|
api = Wikian::Search.new(args)
|
48
50
|
api.doit
|
@@ -58,6 +60,15 @@ class Wikian
|
|
58
60
|
puts "#{e.class} #{e.message} in #{__FILE__}"
|
59
61
|
end
|
60
62
|
|
63
|
+
def self.meta_dir
|
64
|
+
'.wikian'
|
65
|
+
end
|
66
|
+
|
67
|
+
# file to store metadata of fetched articles
|
68
|
+
def self.meta_file
|
69
|
+
File.join(meta_dir, 'meta.yml')
|
70
|
+
end
|
71
|
+
|
61
72
|
def help
|
62
73
|
puts <<~eos
|
63
74
|
Usage:
|
@@ -76,7 +87,7 @@ class Wikian
|
|
76
87
|
-v, --version print version number
|
77
88
|
|
78
89
|
Subcommands:
|
79
|
-
c, contributions [N] get user last N contributions
|
90
|
+
c, contributions [N] get user last N contributions (defaults to #{Contributions::DEFAULT_MAX_CONTRIBUTIONS})
|
80
91
|
g, get get wikitext from a Wikipedia article
|
81
92
|
p, post post wikitext to a Wikipedia article
|
82
93
|
s, search search wikitext in Wikipedia
|
data/lib/wikian/contributions.rb
CHANGED
@@ -11,15 +11,15 @@ class Wikian
|
|
11
11
|
|
12
12
|
max_contributions = args.find(&:numeric?) || DEFAULT_MAX_CONTRIBUTIONS
|
13
13
|
|
14
|
-
raise(BadUrlError, "Try passing the '-t' option") unless
|
14
|
+
raise(BadUrlError, "Try passing the '-t' option") unless config['meta']['site']
|
15
15
|
|
16
|
-
@output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' +
|
16
|
+
@output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' + config['meta']['site']
|
17
17
|
|
18
18
|
@params.merge!('ucuser' => ENV['WIKI_USER'], 'uclimit' => max_contributions, 'format' => Wikian::RESPONSE_FORMAT)
|
19
19
|
|
20
20
|
@query = @params.to_query
|
21
21
|
|
22
|
-
@api_url = URI("https://#{
|
22
|
+
@api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
|
23
23
|
rescue => e
|
24
24
|
puts "#{e.class} in #{__FILE__}. #{e.message}"
|
25
25
|
exit
|
data/lib/wikian/get.rb
CHANGED
@@ -1,17 +1,16 @@
|
|
1
1
|
class Wikian
|
2
2
|
class WikianGetError < StandardError; end
|
3
|
-
class ExtractWikiError < WikianGetError; end
|
4
3
|
class ArgumentRequiredError < WikianGetError; end
|
5
4
|
|
6
5
|
class Get < Subcommand
|
7
|
-
attr_accessor :title
|
6
|
+
attr_accessor :title, :url, :latest_revision
|
8
7
|
|
9
8
|
def initialize(args)
|
10
9
|
raise ArgumentRequiredError if args.empty?
|
11
10
|
|
12
11
|
super
|
13
12
|
|
14
|
-
url = URI(args.find{|arg| arg =~ URI.regexp})
|
13
|
+
@url = URI(args.find{|arg| arg =~ URI.regexp})
|
15
14
|
|
16
15
|
raise BadUrlError unless url.path
|
17
16
|
|
@@ -33,46 +32,43 @@ class Wikian
|
|
33
32
|
#
|
34
33
|
# return: nil
|
35
34
|
def extract_wikitext
|
36
|
-
|
37
|
-
raise ExtractWikiError, 'JSON response has no pages'
|
38
|
-
end
|
35
|
+
pages = JSON.parse(res_body).dig('query','pages')
|
39
36
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
File.open(wiki_file,'w') do |f|
|
47
|
-
content = revision.dig('slots', 'main', 'content') ||
|
48
|
-
revision.dig('slots', '*') ||
|
49
|
-
revision.dig('*')
|
50
|
-
STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
|
51
|
-
STDERR.puts "Writing to #{wiki_file}"
|
52
|
-
f.puts content
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
37
|
+
# Wikipedia is inconsistent in their value for 'pages', it's sometimes a hash, sometimes an array
|
38
|
+
@latest_revision = (pages.respond_to?(:keys) ? pages.values.first : pages.first)['revisions'].first
|
39
|
+
|
40
|
+
content = latest_revision.dig('slots', 'main', 'content') ||
|
41
|
+
latest_revision.dig('slots', '*') ||
|
42
|
+
latest_revision.dig('*')
|
56
43
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
end
|
44
|
+
wiki_file= File.basename(response_file, File.extname(response_file)) + '.wiki'
|
45
|
+
|
46
|
+
File.open(wiki_file,'w') do |f|
|
47
|
+
STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
|
48
|
+
STDERR.puts "Writing to #{wiki_file}"
|
49
|
+
f.puts content
|
64
50
|
end
|
65
51
|
|
66
52
|
rescue => e
|
67
|
-
puts "An error occurred while extracting the wikitext"
|
68
|
-
"Try using a new config file by pasing the '-t' option.",
|
69
|
-
"Or pass the '-d' option for debugging"
|
53
|
+
puts "An error occurred while extracting the wikitext"
|
70
54
|
exit
|
71
55
|
end
|
72
56
|
|
57
|
+
# save article metadata
|
58
|
+
#
|
59
|
+
# metadata like article timestamp is used to solve edit conflicts
|
60
|
+
def save_metadata
|
61
|
+
FileUtils.mkdir_p(Wikian.meta_dir)
|
62
|
+
|
63
|
+
metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
|
64
|
+
metadata['meta'].merge!(title => {'timestamp' => latest_revision['timestamp']})
|
65
|
+
|
66
|
+
File.write(Wikian.meta_file, YAML.dump(metadata))
|
67
|
+
end
|
68
|
+
|
73
69
|
def template
|
74
70
|
<<~eos
|
75
|
-
# for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:
|
71
|
+
# for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:Revisions
|
76
72
|
meta:
|
77
73
|
headers:
|
78
74
|
user-agent: Wikian
|
@@ -83,9 +79,10 @@ class Wikian
|
|
83
79
|
- revisions
|
84
80
|
rvprop:
|
85
81
|
- content
|
86
|
-
|
87
|
-
|
88
|
-
|
82
|
+
- timestamp
|
83
|
+
#rvsection: # get specific sections
|
84
|
+
# - 0
|
85
|
+
# - 2
|
89
86
|
rvslots:
|
90
87
|
- main
|
91
88
|
formatversion:
|
data/lib/wikian/monkeypatches.rb
CHANGED
data/lib/wikian/post.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
class Wikian
|
2
2
|
class WikianPostError < StandardError; end
|
3
3
|
class WikiFileError < WikianPostError; end
|
4
|
+
class WikiFileNameError < WikianPostError; end
|
5
|
+
class WikiMergeError < WikianPostError; end
|
4
6
|
|
5
7
|
class Post
|
6
8
|
attr_accessor :args, :baseurl, :header, :input_file, :debug, :login_token,
|
7
|
-
:login_cookie, :csrf_token, :csrf_cookie, :query, :body_text,
|
9
|
+
:login_cookie, :csrf_token, :csrf_cookie, :query, :body_text,
|
10
|
+
:username, :params, :latest_revision, :latest_content, :metadata
|
8
11
|
|
9
12
|
def initialize(args)
|
10
13
|
@args = args
|
11
14
|
|
15
|
+
long_to_short_options
|
16
|
+
|
12
17
|
# input wikitext file
|
13
|
-
@input_file = args.find{|f| File.exist? f}
|
14
|
-
raise WikiFileError unless input_file
|
18
|
+
raise WikiFileError unless @input_file = args.find{|f| File.exist? f}
|
15
19
|
|
16
|
-
site = input_file.match(/\.(.*)\.wiki/)[1
|
20
|
+
site = input_file.match(/\.(.*)\.wiki/)&.[](1)
|
21
|
+
raise(WikiFileNameError, "Use the Input file name convention <article_name>.<site>.wiki") unless site
|
17
22
|
|
18
23
|
@baseurl = "https://#{site}/w/api.php"
|
19
24
|
|
@@ -21,15 +26,20 @@ class Wikian
|
|
21
26
|
|
22
27
|
@username = ENV['WIKI_USER']
|
23
28
|
|
24
|
-
@debug = (args & %w(-d
|
29
|
+
@debug = (args & %w(-d)).length > 0 ? true : false
|
25
30
|
rescue => e
|
26
31
|
puts "#{e.class} in #{__FILE__}. #{e.message}"
|
27
32
|
exit
|
28
33
|
end
|
29
34
|
|
35
|
+
# transform long options like '--message' to short options like '-m'
|
36
|
+
def long_to_short_options
|
37
|
+
args.map! {|opt| opt[0,2] == '--' ? opt[1,2] : opt}
|
38
|
+
end
|
39
|
+
|
30
40
|
def post
|
31
41
|
# remove expired cookie
|
32
|
-
if expired_cookie? || args.have?(%w(-r
|
42
|
+
if expired_cookie? || args.have?(%w(-r))
|
33
43
|
FileUtils.rm_f(csrf_cookie_file)
|
34
44
|
end
|
35
45
|
|
@@ -41,10 +51,17 @@ class Wikian
|
|
41
51
|
|
42
52
|
get_csrf_cookie
|
43
53
|
end
|
44
|
-
get_csrf_token
|
45
|
-
|
46
54
|
build_query_string
|
47
55
|
|
56
|
+
get_latest_revision
|
57
|
+
|
58
|
+
if @body_text && Time.parse(params['starttimestamp']) < Time.parse(params['basetimestamp'])
|
59
|
+
puts "\e[31mEdit conflict detected, merging with latest version\e[m"
|
60
|
+
merge_versions
|
61
|
+
end
|
62
|
+
|
63
|
+
get_csrf_token
|
64
|
+
|
48
65
|
upload_article
|
49
66
|
end
|
50
67
|
|
@@ -84,9 +101,19 @@ class Wikian
|
|
84
101
|
puts(res.body) if debug
|
85
102
|
end
|
86
103
|
|
104
|
+
def get_latest_revision
|
105
|
+
res = URI.open("#{baseurl}?action=query&prop=revisions&titles=#{params['title']}&rvslots=main&rvprop=content|timestamp&format=json")
|
106
|
+
@latest_revision = JSON.parse(res.read).dig('query', 'pages').values.first.dig('revisions').first
|
107
|
+
params['basetimestamp'] = latest_revision['timestamp']
|
108
|
+
@latest_content = latest_revision.dig('slots', 'main', 'content') ||
|
109
|
+
latest_revision.dig('slots', 'main', '*') ||
|
110
|
+
latest_revision.dig('slots', '*') ||
|
111
|
+
latest_revision.dig('*')
|
112
|
+
end
|
113
|
+
|
87
114
|
def get_csrf_token
|
88
115
|
puts("\nGetting csrf token using csrf cookies") if debug
|
89
|
-
url = URI("#{baseurl}?action=query&meta=tokens&format=json&
|
116
|
+
url = URI("#{baseurl}?action=query&meta=tokens&format=json&prop=info|revisions&rvprop=timestamp")
|
90
117
|
res = URI.open(url, header.merge('cookie' => csrf_cookie))
|
91
118
|
json = JSON.parse(res.read)
|
92
119
|
@csrf_token = json.dig('query','tokens','csrftoken')
|
@@ -94,32 +121,51 @@ class Wikian
|
|
94
121
|
end
|
95
122
|
|
96
123
|
def build_query_string
|
97
|
-
params={}
|
124
|
+
@params={}
|
98
125
|
params['action'] = 'edit'
|
99
126
|
params['format'] = Wikian::RESPONSE_FORMAT
|
100
127
|
params['title'] = input_file.sub(/\..*/,'')
|
128
|
+
@metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
|
129
|
+
params['starttimestamp'] =
|
130
|
+
if timestamp = metadata.dig('meta', params['title'], 'timestamp')
|
131
|
+
timestamp
|
132
|
+
else
|
133
|
+
FileUtils.mkdir_p(Wikian.meta_dir)
|
134
|
+
metadata = {'meta' => {'title' => {'timestamp' => File.ctime(input_file).utc.iso8601}}}
|
135
|
+
File.write(Wikian.meta_file, YAML.dump(metadata))
|
136
|
+
end
|
101
137
|
wikitext = File.read(input_file)
|
102
|
-
if args.have?(%w(-a
|
138
|
+
if args.have?(%w(-a))
|
103
139
|
params['appendtext'] = wikitext
|
104
|
-
elsif args.have?(%w(-p
|
140
|
+
elsif args.have?(%w(-p))
|
105
141
|
params['prependtext'] = wikitext
|
106
142
|
else
|
107
143
|
# pass the wikitext in request body
|
108
144
|
@body_text = wikitext
|
109
145
|
end
|
110
|
-
if args.have?(%w(-c
|
146
|
+
if args.have?(%w(-c))
|
111
147
|
params['captchaid'], params['captchaword'] = args[args.index('-c')+1].split(':')
|
112
148
|
end
|
113
|
-
if args.have?(%w(-m
|
149
|
+
if args.have?(%w(-m))
|
114
150
|
params['summary'] = args[args.index('-m')+1]
|
115
151
|
end
|
116
|
-
if args.have?(%w(-s
|
152
|
+
if args.have?(%w(-s))
|
117
153
|
params['section'] = args[args.index('-s')+1]
|
118
154
|
end
|
119
|
-
|
155
|
+
end
|
156
|
+
|
157
|
+
def merge_versions
|
158
|
+
tmp_local = Tempfile.open {|f| f.write @body_text; f}
|
159
|
+
tmp_latest = Tempfile.open {|f| f.write latest_content; f}
|
160
|
+
@body_text = %x(diff --line-format %L #{tmp_local.path} #{tmp_latest.path})
|
161
|
+
metadata['meta'].merge!(params['title'] => {'timestamp' => Time.now.utc.iso8601})
|
162
|
+
rescue => e
|
163
|
+
puts "WikiMergeError in #{__FILE__}"
|
164
|
+
exit
|
120
165
|
end
|
121
166
|
|
122
167
|
def upload_article
|
168
|
+
@query = URI.encode_www_form(params)
|
123
169
|
puts("\nUploading the wiki article using csrf token #{csrf_token}") if debug
|
124
170
|
url = URI("#{baseurl}?#{query}")
|
125
171
|
req = Net::HTTP::Post.new(url, header.merge('cookie' => csrf_cookie, 'content-type' => 'application/x-www-form-urlencoded'))
|
data/lib/wikian/search.rb
CHANGED
@@ -6,13 +6,13 @@ class Wikian
|
|
6
6
|
def initialize(args)
|
7
7
|
super
|
8
8
|
|
9
|
-
@output_file =
|
9
|
+
@output_file = config['api']['srsearch'].first
|
10
10
|
|
11
11
|
@params.merge!('format' => Wikian::RESPONSE_FORMAT)
|
12
12
|
|
13
13
|
@query = @params.to_query
|
14
14
|
|
15
|
-
@api_url = URI("https://#{
|
15
|
+
@api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
|
16
16
|
rescue => e
|
17
17
|
puts "#{e.class} in #{__FILE__}. #{e.message}"
|
18
18
|
exit
|
data/lib/wikian/subcommand.rb
CHANGED
@@ -1,12 +1,11 @@
|
|
1
|
-
#!/usr/bin/env -S ruby -W0
|
2
1
|
class Wikian
|
3
2
|
class WikianSubcommandError < StandardError; end
|
4
|
-
class MissingConfigFileError < WikianSubcommandError; end
|
5
3
|
class BadUrlError < WikianSubcommandError; end
|
6
4
|
|
7
5
|
# class to be inherited by other Wikian classes
|
8
6
|
class Subcommand
|
9
|
-
attr_accessor :args, :res, :
|
7
|
+
attr_accessor :args, :res, :config, :query, :title, :api_url,
|
8
|
+
:debug, :output_file, :res_body
|
10
9
|
|
11
10
|
def initialize(args)
|
12
11
|
@args = args
|
@@ -19,11 +18,15 @@ class Wikian
|
|
19
18
|
|
20
19
|
@debug = (args & %w(-d --debug)).length > 0 ? true : false
|
21
20
|
|
22
|
-
|
23
|
-
|
21
|
+
@config =
|
22
|
+
if File.exist?(Wikian::CONFIG_FILE)
|
23
|
+
YAML.load(File.open(Wikian::CONFIG_FILE))
|
24
|
+
else
|
25
|
+
YAML.load(template)
|
26
|
+
end
|
24
27
|
|
25
28
|
# some params like 'titles' can contain multiple entries joined by '|'. More info in Wikipedia API docs
|
26
|
-
@params = Hash[
|
29
|
+
@params = Hash[config['api'].keys.zip(config['api'].values.map{|arr| arr.join("|")})]
|
27
30
|
rescue MissingConfigFileError => e
|
28
31
|
puts "#{e.class} try passing the '-t' option to generate #{Wikian::CONFIG_FILE} in #{__FILE__}"
|
29
32
|
exit
|
@@ -42,27 +45,23 @@ class Wikian
|
|
42
45
|
|
43
46
|
# HTTP response file name. Its extension depends on the 'content-type' header
|
44
47
|
def response_file
|
45
|
-
output_file + '.' + res['content-type'].split('/').last.sub(/;.*/,'')
|
48
|
+
output_file + '.' + res.meta['content-type'].split('/').last.sub(/;.*/,'')
|
46
49
|
end
|
47
50
|
|
48
51
|
# write response in to `response_file`
|
49
52
|
def write_response
|
50
53
|
STDERR.puts "Writing to #{response_file}"
|
51
54
|
File.open(response_file, 'w') do |f|
|
52
|
-
f.puts prettify(
|
55
|
+
f.puts prettify(res_body)
|
53
56
|
end
|
54
57
|
end
|
55
58
|
|
56
59
|
def doit
|
57
60
|
puts api_url if debug
|
58
61
|
|
59
|
-
|
62
|
+
@res=URI.open(api_url, config['meta']['headers'])
|
60
63
|
|
61
|
-
|
62
|
-
|
63
|
-
http.use_ssl = true
|
64
|
-
|
65
|
-
@res=http.request(req)
|
64
|
+
@res_body = res.read
|
66
65
|
|
67
66
|
write_response
|
68
67
|
rescue => e
|
@@ -74,7 +73,7 @@ class Wikian
|
|
74
73
|
|
75
74
|
# if response is JSON prettify it, otherwise return it unchanged
|
76
75
|
def prettify(str)
|
77
|
-
res['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
|
76
|
+
res.meta['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
|
78
77
|
end
|
79
78
|
end
|
80
79
|
end
|
data/lib/wikian/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wikian
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- sergioro
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Get and update Wikipedia articles
|
14
14
|
email:
|