wikian 0.1.12 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dac3dedbc9c20098849e966beda97855a9a133d8b9334ad26d5a2a651ce18fa2
4
- data.tar.gz: 347b7ead2d39dee453f122757f1cfb758559eb86e30ec924e55a6aca48101e4f
3
+ metadata.gz: 34b650b2c286774ef79f660715de782f54cd5b86d7690f6055dd662cdda8e1d0
4
+ data.tar.gz: 4b11d22fc6556d696b75dcd1d0373573e02a8eda58a02d661090be8faf03ec8d
5
5
  SHA512:
6
- metadata.gz: 518a100775e95f60f37925142dafea7614298c654ff5a87f5358966dc7e40c25a3551f06c547dac971d81bf3589100967815104ee4fc3d6e239788a87553b8b0
7
- data.tar.gz: a0230996611e118bb272dfdc83c8ef424bf180fb7b5fd51057f6ceee8a24e3ffe931493ce1b11ee77668d38c131346020d7e8342cd544c64147782d8e41d1c9d
6
+ metadata.gz: 9688be1c774d25208822ccba31d0381f512d9fb161c829631b22d70a824d841ed78b70303a32161f3826649fc1ec34a1af819724a1c34d6c0471d87cdf2ef92f
7
+ data.tar.gz: 5d3cb888c767d07f3ead8c9dbc56d962042dcb1a719274dd9e28493e8e146ed5807e646af17cf08219737b6e85f3ebe76c60d48854d73f4e1fa0438d2685b490
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- wikian (0.1.12)
4
+ wikian (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -13,6 +13,7 @@ require 'fileutils'
13
13
  require 'json'
14
14
  require 'net/http'
15
15
  require 'open-uri'
16
+ require 'tempfile'
16
17
  require 'yaml'
17
18
 
18
19
  class Wikian
@@ -43,6 +44,7 @@ class Wikian
43
44
  api = Wikian::Get.new(args)
44
45
  api.doit
45
46
  api.extract_wikitext
47
+ api.save_metadata
46
48
  elsif subcommand[0] == 's'
47
49
  api = Wikian::Search.new(args)
48
50
  api.doit
@@ -58,6 +60,15 @@ class Wikian
58
60
  puts "#{e.class} #{e.message} in #{__FILE__}"
59
61
  end
60
62
 
63
+ def self.meta_dir
64
+ '.wikian'
65
+ end
66
+
67
+ # file to store metadata of fetched articles
68
+ def self.meta_file
69
+ File.join(meta_dir, 'meta.yml')
70
+ end
71
+
61
72
  def help
62
73
  puts <<~eos
63
74
  Usage:
@@ -76,7 +87,7 @@ class Wikian
76
87
  -v, --version print version number
77
88
 
78
89
  Subcommands:
79
- c, contributions [N] get user last N contributions. N defaults to 20
90
+ c, contributions [N] get user last N contributions (defaults to #{Contributions::DEFAULT_MAX_CONTRIBUTIONS})
80
91
  g, get get wikitext from a Wikipedia article
81
92
  p, post post wikitext to a Wikipedia article
82
93
  s, search search wikitext in Wikipedia
@@ -11,15 +11,15 @@ class Wikian
11
11
 
12
12
  max_contributions = args.find(&:numeric?) || DEFAULT_MAX_CONTRIBUTIONS
13
13
 
14
- raise(BadUrlError, "Try passing the '-t' option") unless yaml['meta']['site']
14
+ raise(BadUrlError, "Try passing the '-t' option") unless config['meta']['site']
15
15
 
16
- @output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' + yaml['meta']['site']
16
+ @output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' + config['meta']['site']
17
17
 
18
18
  @params.merge!('ucuser' => ENV['WIKI_USER'], 'uclimit' => max_contributions, 'format' => Wikian::RESPONSE_FORMAT)
19
19
 
20
20
  @query = @params.to_query
21
21
 
22
- @api_url = URI("https://#{yaml['meta']['site']}/w/api.php?#{query}")
22
+ @api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
23
23
  rescue => e
24
24
  puts "#{e.class} in #{__FILE__}. #{e.message}"
25
25
  exit
@@ -1,17 +1,16 @@
1
1
  class Wikian
2
2
  class WikianGetError < StandardError; end
3
- class ExtractWikiError < WikianGetError; end
4
3
  class ArgumentRequiredError < WikianGetError; end
5
4
 
6
5
  class Get < Subcommand
7
- attr_accessor :title
6
+ attr_accessor :title, :url, :latest_revision
8
7
 
9
8
  def initialize(args)
10
9
  raise ArgumentRequiredError if args.empty?
11
10
 
12
11
  super
13
12
 
14
- url = URI(args.find{|arg| arg =~ URI.regexp})
13
+ @url = URI(args.find{|arg| arg =~ URI.regexp})
15
14
 
16
15
  raise BadUrlError unless url.path
17
16
 
@@ -33,46 +32,43 @@ class Wikian
33
32
  #
34
33
  # return: nil
35
34
  def extract_wikitext
36
- if !res['content-type'].match?('json') || !(pages = JSON.parse(res.body).dig('query','pages'))
37
- raise ExtractWikiError, 'JSON response has no pages'
38
- end
35
+ pages = JSON.parse(res_body).dig('query','pages')
39
36
 
40
- create_wiki = -> (title, revisions) do
41
- revisions.each do |revision|
42
- wiki_file= File.basename(response_file, File.extname(response_file)) + '.wiki'
43
- if revision['revid'].nil? && revisions.size > 1
44
- STDERR.puts "Warning: you should specify 'revid' in #{Wikian::CONFIG_FILE} to prevent overriding different revisions"
45
- end
46
- File.open(wiki_file,'w') do |f|
47
- content = revision.dig('slots', 'main', 'content') ||
48
- revision.dig('slots', '*') ||
49
- revision.dig('*')
50
- STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
51
- STDERR.puts "Writing to #{wiki_file}"
52
- f.puts content
53
- end
54
- end
55
- end
37
+ # Wikipedia is inconsistent in their value for 'pages', it's sometimes a hash, sometimes an array
38
+ @latest_revision = (pages.respond_to?(:keys) ? pages.values.first : pages.first)['revisions'].first
39
+
40
+ content = latest_revision.dig('slots', 'main', 'content') ||
41
+ latest_revision.dig('slots', '*') ||
42
+ latest_revision.dig('*')
56
43
 
57
- # this is ugly, but Wikipedia is inconsistent in their JSON value for 'pages'. Sometimes it's a hash, sometimes it's an array.
58
- if pages.respond_to? :keys
59
- create_wiki.call(pages.values.first['title'], pages.values.first['revisions'])
60
- else
61
- pages.each do |page|
62
- create_wiki.call(page['title'], page['revisions'])
63
- end
44
+ wiki_file= File.basename(response_file, File.extname(response_file)) + '.wiki'
45
+
46
+ File.open(wiki_file,'w') do |f|
47
+ STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
48
+ STDERR.puts "Writing to #{wiki_file}"
49
+ f.puts content
64
50
  end
65
51
 
66
52
  rescue => e
67
- puts "An error occurred while extracting the wikitext",
68
- "Try using a new config file by pasing the '-t' option.",
69
- "Or pass the '-d' option for debugging"
53
+ puts "An error occurred while extracting the wikitext"
70
54
  exit
71
55
  end
72
56
 
57
+ # save article metadata
58
+ #
59
+ # metadata like article timestamp is used to solve edit conflicts
60
+ def save_metadata
61
+ FileUtils.mkdir_p(Wikian.meta_dir)
62
+
63
+ metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
64
+ metadata['meta'].merge!(title => {'timestamp' => latest_revision['timestamp']})
65
+
66
+ File.write(Wikian.meta_file, YAML.dump(metadata))
67
+ end
68
+
73
69
  def template
74
70
  <<~eos
75
- # for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:Get_the_contents_of_a_page
71
+ # for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:Revisions
76
72
  meta:
77
73
  headers:
78
74
  user-agent: Wikian
@@ -83,9 +79,10 @@ class Wikian
83
79
  - revisions
84
80
  rvprop:
85
81
  - content
86
- #rvsection: # get specific sections
87
- # - 0
88
- # - 2
82
+ - timestamp
83
+ #rvsection: # get specific sections
84
+ # - 0
85
+ # - 2
89
86
  rvslots:
90
87
  - main
91
88
  formatversion:
@@ -9,7 +9,7 @@ end
9
9
  class Hash
10
10
  # return a query string representation of a hash
11
11
  def to_query
12
- URI.decode(URI.encode_www_form(self))
12
+ URI::DEFAULT_PARSER.unescape(URI.encode_www_form(self))
13
13
  end
14
14
  end
15
15
 
@@ -1,19 +1,24 @@
1
1
  class Wikian
2
2
  class WikianPostError < StandardError; end
3
3
  class WikiFileError < WikianPostError; end
4
+ class WikiFileNameError < WikianPostError; end
5
+ class WikiMergeError < WikianPostError; end
4
6
 
5
7
  class Post
6
8
  attr_accessor :args, :baseurl, :header, :input_file, :debug, :login_token,
7
- :login_cookie, :csrf_token, :csrf_cookie, :query, :body_text, :username
9
+ :login_cookie, :csrf_token, :csrf_cookie, :query, :body_text,
10
+ :username, :params, :latest_revision, :latest_content, :metadata
8
11
 
9
12
  def initialize(args)
10
13
  @args = args
11
14
 
15
+ long_to_short_options
16
+
12
17
  # input wikitext file
13
- @input_file = args.find{|f| File.exist? f}
14
- raise WikiFileError unless input_file
18
+ raise WikiFileError unless @input_file = args.find{|f| File.exist? f}
15
19
 
16
- site = input_file.match(/\.(.*)\.wiki/)[1]
20
+ site = input_file.match(/\.(.*)\.wiki/)&.[](1)
21
+ raise(WikiFileNameError, "Use the Input file name convention <article_name>.<site>.wiki") unless site
17
22
 
18
23
  @baseurl = "https://#{site}/w/api.php"
19
24
 
@@ -21,15 +26,20 @@ class Wikian
21
26
 
22
27
  @username = ENV['WIKI_USER']
23
28
 
24
- @debug = (args & %w(-d --debug)).length > 0 ? true : false
29
+ @debug = (args & %w(-d)).length > 0 ? true : false
25
30
  rescue => e
26
31
  puts "#{e.class} in #{__FILE__}. #{e.message}"
27
32
  exit
28
33
  end
29
34
 
35
+ # transform long options like '--message' to short options like '-m'
36
+ def long_to_short_options
37
+ args.map! {|opt| opt[0,2] == '--' ? opt[1,2] : opt}
38
+ end
39
+
30
40
  def post
31
41
  # remove expired cookie
32
- if expired_cookie? || args.have?(%w(-r --remove-cookie))
42
+ if expired_cookie? || args.have?(%w(-r))
33
43
  FileUtils.rm_f(csrf_cookie_file)
34
44
  end
35
45
 
@@ -41,10 +51,17 @@ class Wikian
41
51
 
42
52
  get_csrf_cookie
43
53
  end
44
- get_csrf_token
45
-
46
54
  build_query_string
47
55
 
56
+ get_latest_revision
57
+
58
+ if @body_text && Time.parse(params['starttimestamp']) < Time.parse(params['basetimestamp'])
59
+ puts "\e[31mEdit conflict detected, merging with latest version\e[m"
60
+ merge_versions
61
+ end
62
+
63
+ get_csrf_token
64
+
48
65
  upload_article
49
66
  end
50
67
 
@@ -84,9 +101,19 @@ class Wikian
84
101
  puts(res.body) if debug
85
102
  end
86
103
 
104
+ def get_latest_revision
105
+ res = URI.open("#{baseurl}?action=query&prop=revisions&titles=#{params['title']}&rvslots=main&rvprop=content|timestamp&format=json")
106
+ @latest_revision = JSON.parse(res.read).dig('query', 'pages').values.first.dig('revisions').first
107
+ params['basetimestamp'] = latest_revision['timestamp']
108
+ @latest_content = latest_revision.dig('slots', 'main', 'content') ||
109
+ latest_revision.dig('slots', 'main', '*') ||
110
+ latest_revision.dig('slots', '*') ||
111
+ latest_revision.dig('*')
112
+ end
113
+
87
114
  def get_csrf_token
88
115
  puts("\nGetting csrf token using csrf cookies") if debug
89
- url = URI("#{baseurl}?action=query&meta=tokens&format=json&type=csrf")
116
+ url = URI("#{baseurl}?action=query&meta=tokens&format=json&prop=info|revisions&rvprop=timestamp")
90
117
  res = URI.open(url, header.merge('cookie' => csrf_cookie))
91
118
  json = JSON.parse(res.read)
92
119
  @csrf_token = json.dig('query','tokens','csrftoken')
@@ -94,32 +121,51 @@ class Wikian
94
121
  end
95
122
 
96
123
  def build_query_string
97
- params={}
124
+ @params={}
98
125
  params['action'] = 'edit'
99
126
  params['format'] = Wikian::RESPONSE_FORMAT
100
127
  params['title'] = input_file.sub(/\..*/,'')
128
+ @metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
129
+ params['starttimestamp'] =
130
+ if timestamp = metadata.dig('meta', params['title'], 'timestamp')
131
+ timestamp
132
+ else
133
+ FileUtils.mkdir_p(Wikian.meta_dir)
134
+ metadata = {'meta' => {'title' => {'timestamp' => File.ctime(input_file).utc.iso8601}}}
135
+ File.write(Wikian.meta_file, YAML.dump(metadata))
136
+ end
101
137
  wikitext = File.read(input_file)
102
- if args.have?(%w(-a --append))
138
+ if args.have?(%w(-a))
103
139
  params['appendtext'] = wikitext
104
- elsif args.have?(%w(-p --prepend))
140
+ elsif args.have?(%w(-p))
105
141
  params['prependtext'] = wikitext
106
142
  else
107
143
  # pass the wikitext in request body
108
144
  @body_text = wikitext
109
145
  end
110
- if args.have?(%w(-c --captcha))
146
+ if args.have?(%w(-c))
111
147
  params['captchaid'], params['captchaword'] = args[args.index('-c')+1].split(':')
112
148
  end
113
- if args.have?(%w(-m --message))
149
+ if args.have?(%w(-m))
114
150
  params['summary'] = args[args.index('-m')+1]
115
151
  end
116
- if args.have?(%w(-s --section))
152
+ if args.have?(%w(-s))
117
153
  params['section'] = args[args.index('-s')+1]
118
154
  end
119
- @query = URI.encode_www_form(params)
155
+ end
156
+
157
+ def merge_versions
158
+ tmp_local = Tempfile.open {|f| f.write @body_text; f}
159
+ tmp_latest = Tempfile.open {|f| f.write latest_content; f}
160
+ @body_text = %x(diff --line-format %L #{tmp_local.path} #{tmp_latest.path})
161
+ metadata['meta'].merge!(params['title'] => {'timestamp' => Time.now.utc.iso8601})
162
+ rescue => e
163
+ puts "WikiMergeError in #{__FILE__}"
164
+ exit
120
165
  end
121
166
 
122
167
  def upload_article
168
+ @query = URI.encode_www_form(params)
123
169
  puts("\nUploading the wiki article using csrf token #{csrf_token}") if debug
124
170
  url = URI("#{baseurl}?#{query}")
125
171
  req = Net::HTTP::Post.new(url, header.merge('cookie' => csrf_cookie, 'content-type' => 'application/x-www-form-urlencoded'))
@@ -6,13 +6,13 @@ class Wikian
6
6
  def initialize(args)
7
7
  super
8
8
 
9
- @output_file = yaml['api']['srsearch'].first
9
+ @output_file = config['api']['srsearch'].first
10
10
 
11
11
  @params.merge!('format' => Wikian::RESPONSE_FORMAT)
12
12
 
13
13
  @query = @params.to_query
14
14
 
15
- @api_url = URI("https://#{yaml['meta']['site']}/w/api.php?#{query}")
15
+ @api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
16
16
  rescue => e
17
17
  puts "#{e.class} in #{__FILE__}. #{e.message}"
18
18
  exit
@@ -1,12 +1,11 @@
1
- #!/usr/bin/env -S ruby -W0
2
1
  class Wikian
3
2
  class WikianSubcommandError < StandardError; end
4
- class MissingConfigFileError < WikianSubcommandError; end
5
3
  class BadUrlError < WikianSubcommandError; end
6
4
 
7
5
  # class to be inherited by other Wikian classes
8
6
  class Subcommand
9
- attr_accessor :args, :res, :yaml, :query, :title, :api_url, :debug, :output_file
7
+ attr_accessor :args, :res, :config, :query, :title, :api_url,
8
+ :debug, :output_file, :res_body
10
9
 
11
10
  def initialize(args)
12
11
  @args = args
@@ -19,11 +18,15 @@ class Wikian
19
18
 
20
19
  @debug = (args & %w(-d --debug)).length > 0 ? true : false
21
20
 
22
- raise MissingConfigFileError unless File.exist?(Wikian::CONFIG_FILE)
23
- @yaml=YAML.load(File.open(Wikian::CONFIG_FILE))
21
+ @config =
22
+ if File.exist?(Wikian::CONFIG_FILE)
23
+ YAML.load(File.open(Wikian::CONFIG_FILE))
24
+ else
25
+ YAML.load(template)
26
+ end
24
27
 
25
28
  # some params like 'titles' can contain multiple entries joined by '|'. More info in Wikipedia API docs
26
- @params = Hash[yaml['api'].keys.zip(yaml['api'].values.map{|arr| arr.join("|")})]
29
+ @params = Hash[config['api'].keys.zip(config['api'].values.map{|arr| arr.join("|")})]
27
30
  rescue MissingConfigFileError => e
28
31
  puts "#{e.class} try passing the '-t' option to generate #{Wikian::CONFIG_FILE} in #{__FILE__}"
29
32
  exit
@@ -42,27 +45,23 @@ class Wikian
42
45
 
43
46
  # HTTP response file name. Its extension depends on the 'content-type' header
44
47
  def response_file
45
- output_file + '.' + res['content-type'].split('/').last.sub(/;.*/,'')
48
+ output_file + '.' + res.meta['content-type'].split('/').last.sub(/;.*/,'')
46
49
  end
47
50
 
48
51
  # write response in to `response_file`
49
52
  def write_response
50
53
  STDERR.puts "Writing to #{response_file}"
51
54
  File.open(response_file, 'w') do |f|
52
- f.puts prettify(res.body)
55
+ f.puts prettify(res_body)
53
56
  end
54
57
  end
55
58
 
56
59
  def doit
57
60
  puts api_url if debug
58
61
 
59
- req = Net::HTTP::Get.new(api_url, yaml['meta']['headers'])
62
+ @res=URI.open(api_url, config['meta']['headers'])
60
63
 
61
- http = Net::HTTP.new(api_url.host, api_url.port)
62
-
63
- http.use_ssl = true
64
-
65
- @res=http.request(req)
64
+ @res_body = res.read
66
65
 
67
66
  write_response
68
67
  rescue => e
@@ -74,7 +73,7 @@ class Wikian
74
73
 
75
74
  # if response is JSON prettify it, otherwise return it unchanged
76
75
  def prettify(str)
77
- res['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
76
+ res.meta['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
78
77
  end
79
78
  end
80
79
  end
@@ -1,3 +1,3 @@
1
1
  class Wikian
2
- VERSION = "0.1.12"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikian
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.12
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - sergioro
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-13 00:00:00.000000000 Z
11
+ date: 2020-09-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Get and update Wikipedia articles
14
14
  email: