wikian 0.1.12 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dac3dedbc9c20098849e966beda97855a9a133d8b9334ad26d5a2a651ce18fa2
4
- data.tar.gz: 347b7ead2d39dee453f122757f1cfb758559eb86e30ec924e55a6aca48101e4f
3
+ metadata.gz: 34b650b2c286774ef79f660715de782f54cd5b86d7690f6055dd662cdda8e1d0
4
+ data.tar.gz: 4b11d22fc6556d696b75dcd1d0373573e02a8eda58a02d661090be8faf03ec8d
5
5
  SHA512:
6
- metadata.gz: 518a100775e95f60f37925142dafea7614298c654ff5a87f5358966dc7e40c25a3551f06c547dac971d81bf3589100967815104ee4fc3d6e239788a87553b8b0
7
- data.tar.gz: a0230996611e118bb272dfdc83c8ef424bf180fb7b5fd51057f6ceee8a24e3ffe931493ce1b11ee77668d38c131346020d7e8342cd544c64147782d8e41d1c9d
6
+ metadata.gz: 9688be1c774d25208822ccba31d0381f512d9fb161c829631b22d70a824d841ed78b70303a32161f3826649fc1ec34a1af819724a1c34d6c0471d87cdf2ef92f
7
+ data.tar.gz: 5d3cb888c767d07f3ead8c9dbc56d962042dcb1a719274dd9e28493e8e146ed5807e646af17cf08219737b6e85f3ebe76c60d48854d73f4e1fa0438d2685b490
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- wikian (0.1.12)
4
+ wikian (0.2.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -13,6 +13,7 @@ require 'fileutils'
13
13
  require 'json'
14
14
  require 'net/http'
15
15
  require 'open-uri'
16
+ require 'tempfile'
16
17
  require 'yaml'
17
18
 
18
19
  class Wikian
@@ -43,6 +44,7 @@ class Wikian
43
44
  api = Wikian::Get.new(args)
44
45
  api.doit
45
46
  api.extract_wikitext
47
+ api.save_metadata
46
48
  elsif subcommand[0] == 's'
47
49
  api = Wikian::Search.new(args)
48
50
  api.doit
@@ -58,6 +60,15 @@ class Wikian
58
60
  puts "#{e.class} #{e.message} in #{__FILE__}"
59
61
  end
60
62
 
63
+ def self.meta_dir
64
+ '.wikian'
65
+ end
66
+
67
+ # file to store metadata of fetched articles
68
+ def self.meta_file
69
+ File.join(meta_dir, 'meta.yml')
70
+ end
71
+
61
72
  def help
62
73
  puts <<~eos
63
74
  Usage:
@@ -76,7 +87,7 @@ class Wikian
76
87
  -v, --version print version number
77
88
 
78
89
  Subcommands:
79
- c, contributions [N] get user last N contributions. N defaults to 20
90
+ c, contributions [N] get user last N contributions (defaults to #{Contributions::DEFAULT_MAX_CONTRIBUTIONS})
80
91
  g, get get wikitext from a Wikipedia article
81
92
  p, post post wikitext to a Wikipedia article
82
93
  s, search search wikitext in Wikipedia
@@ -11,15 +11,15 @@ class Wikian
11
11
 
12
12
  max_contributions = args.find(&:numeric?) || DEFAULT_MAX_CONTRIBUTIONS
13
13
 
14
- raise(BadUrlError, "Try passing the '-t' option") unless yaml['meta']['site']
14
+ raise(BadUrlError, "Try passing the '-t' option") unless config['meta']['site']
15
15
 
16
- @output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' + yaml['meta']['site']
16
+ @output_file = 'User:' + ENV['WIKI_USER'] + '.contributions.' + config['meta']['site']
17
17
 
18
18
  @params.merge!('ucuser' => ENV['WIKI_USER'], 'uclimit' => max_contributions, 'format' => Wikian::RESPONSE_FORMAT)
19
19
 
20
20
  @query = @params.to_query
21
21
 
22
- @api_url = URI("https://#{yaml['meta']['site']}/w/api.php?#{query}")
22
+ @api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
23
23
  rescue => e
24
24
  puts "#{e.class} in #{__FILE__}. #{e.message}"
25
25
  exit
@@ -1,17 +1,16 @@
1
1
  class Wikian
2
2
  class WikianGetError < StandardError; end
3
- class ExtractWikiError < WikianGetError; end
4
3
  class ArgumentRequiredError < WikianGetError; end
5
4
 
6
5
  class Get < Subcommand
7
- attr_accessor :title
6
+ attr_accessor :title, :url, :latest_revision
8
7
 
9
8
  def initialize(args)
10
9
  raise ArgumentRequiredError if args.empty?
11
10
 
12
11
  super
13
12
 
14
- url = URI(args.find{|arg| arg =~ URI.regexp})
13
+ @url = URI(args.find{|arg| arg =~ URI.regexp})
15
14
 
16
15
  raise BadUrlError unless url.path
17
16
 
@@ -33,46 +32,43 @@ class Wikian
33
32
  #
34
33
  # return: nil
35
34
  def extract_wikitext
36
- if !res['content-type'].match?('json') || !(pages = JSON.parse(res.body).dig('query','pages'))
37
- raise ExtractWikiError, 'JSON response has no pages'
38
- end
35
+ pages = JSON.parse(res_body).dig('query','pages')
39
36
 
40
- create_wiki = -> (title, revisions) do
41
- revisions.each do |revision|
42
- wiki_file= File.basename(response_file, File.extname(response_file)) + '.wiki'
43
- if revision['revid'].nil? && revisions.size > 1
44
- STDERR.puts "Warning: you should specify 'revid' in #{Wikian::CONFIG_FILE} to prevent overriding different revisions"
45
- end
46
- File.open(wiki_file,'w') do |f|
47
- content = revision.dig('slots', 'main', 'content') ||
48
- revision.dig('slots', '*') ||
49
- revision.dig('*')
50
- STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
51
- STDERR.puts "Writing to #{wiki_file}"
52
- f.puts content
53
- end
54
- end
55
- end
37
+ # Wikipedia is inconsistent in their value for 'pages', it's sometimes a hash, sometimes an array
38
+ @latest_revision = (pages.respond_to?(:keys) ? pages.values.first : pages.first)['revisions'].first
39
+
40
+ content = latest_revision.dig('slots', 'main', 'content') ||
41
+ latest_revision.dig('slots', '*') ||
42
+ latest_revision.dig('*')
56
43
 
57
- # this is ugly, but Wikipedia is inconsistent in their JSON value for 'pages'. Sometimes it's a hash, sometimes it's an array.
58
- if pages.respond_to? :keys
59
- create_wiki.call(pages.values.first['title'], pages.values.first['revisions'])
60
- else
61
- pages.each do |page|
62
- create_wiki.call(page['title'], page['revisions'])
63
- end
44
+ wiki_file= File.basename(response_file, File.extname(response_file)) + '.wiki'
45
+
46
+ File.open(wiki_file,'w') do |f|
47
+ STDERR.puts "Warning: nil 'content' in #{Wikian::CONFIG_FILE}" unless content
48
+ STDERR.puts "Writing to #{wiki_file}"
49
+ f.puts content
64
50
  end
65
51
 
66
52
  rescue => e
67
- puts "An error occurred while extracting the wikitext",
68
- "Try using a new config file by pasing the '-t' option.",
69
- "Or pass the '-d' option for debugging"
53
+ puts "An error occurred while extracting the wikitext"
70
54
  exit
71
55
  end
72
56
 
57
+ # save article metadata
58
+ #
59
+ # metadata like article timestamp is used to solve edit conflicts
60
+ def save_metadata
61
+ FileUtils.mkdir_p(Wikian.meta_dir)
62
+
63
+ metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
64
+ metadata['meta'].merge!(title => {'timestamp' => latest_revision['timestamp']})
65
+
66
+ File.write(Wikian.meta_file, YAML.dump(metadata))
67
+ end
68
+
73
69
  def template
74
70
  <<~eos
75
- # for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:Get_the_contents_of_a_page
71
+ # for a list of parameters to use here see: https://www.mediawiki.org/wiki/API:Revisions
76
72
  meta:
77
73
  headers:
78
74
  user-agent: Wikian
@@ -83,9 +79,10 @@ class Wikian
83
79
  - revisions
84
80
  rvprop:
85
81
  - content
86
- #rvsection: # get specific sections
87
- # - 0
88
- # - 2
82
+ - timestamp
83
+ #rvsection: # get specific sections
84
+ # - 0
85
+ # - 2
89
86
  rvslots:
90
87
  - main
91
88
  formatversion:
@@ -9,7 +9,7 @@ end
9
9
  class Hash
10
10
  # return a query string representation of a hash
11
11
  def to_query
12
- URI.decode(URI.encode_www_form(self))
12
+ URI::DEFAULT_PARSER.unescape(URI.encode_www_form(self))
13
13
  end
14
14
  end
15
15
 
@@ -1,19 +1,24 @@
1
1
  class Wikian
2
2
  class WikianPostError < StandardError; end
3
3
  class WikiFileError < WikianPostError; end
4
+ class WikiFileNameError < WikianPostError; end
5
+ class WikiMergeError < WikianPostError; end
4
6
 
5
7
  class Post
6
8
  attr_accessor :args, :baseurl, :header, :input_file, :debug, :login_token,
7
- :login_cookie, :csrf_token, :csrf_cookie, :query, :body_text, :username
9
+ :login_cookie, :csrf_token, :csrf_cookie, :query, :body_text,
10
+ :username, :params, :latest_revision, :latest_content, :metadata
8
11
 
9
12
  def initialize(args)
10
13
  @args = args
11
14
 
15
+ long_to_short_options
16
+
12
17
  # input wikitext file
13
- @input_file = args.find{|f| File.exist? f}
14
- raise WikiFileError unless input_file
18
+ raise WikiFileError unless @input_file = args.find{|f| File.exist? f}
15
19
 
16
- site = input_file.match(/\.(.*)\.wiki/)[1]
20
+ site = input_file.match(/\.(.*)\.wiki/)&.[](1)
21
+ raise(WikiFileNameError, "Use the Input file name convention <article_name>.<site>.wiki") unless site
17
22
 
18
23
  @baseurl = "https://#{site}/w/api.php"
19
24
 
@@ -21,15 +26,20 @@ class Wikian
21
26
 
22
27
  @username = ENV['WIKI_USER']
23
28
 
24
- @debug = (args & %w(-d --debug)).length > 0 ? true : false
29
+ @debug = (args & %w(-d)).length > 0 ? true : false
25
30
  rescue => e
26
31
  puts "#{e.class} in #{__FILE__}. #{e.message}"
27
32
  exit
28
33
  end
29
34
 
35
+ # transform long options like '--message' to short options like '-m'
36
+ def long_to_short_options
37
+ args.map! {|opt| opt[0,2] == '--' ? opt[1,2] : opt}
38
+ end
39
+
30
40
  def post
31
41
  # remove expired cookie
32
- if expired_cookie? || args.have?(%w(-r --remove-cookie))
42
+ if expired_cookie? || args.have?(%w(-r))
33
43
  FileUtils.rm_f(csrf_cookie_file)
34
44
  end
35
45
 
@@ -41,10 +51,17 @@ class Wikian
41
51
 
42
52
  get_csrf_cookie
43
53
  end
44
- get_csrf_token
45
-
46
54
  build_query_string
47
55
 
56
+ get_latest_revision
57
+
58
+ if @body_text && Time.parse(params['starttimestamp']) < Time.parse(params['basetimestamp'])
59
+ puts "\e[31mEdit conflict detected, merging with latest version\e[m"
60
+ merge_versions
61
+ end
62
+
63
+ get_csrf_token
64
+
48
65
  upload_article
49
66
  end
50
67
 
@@ -84,9 +101,19 @@ class Wikian
84
101
  puts(res.body) if debug
85
102
  end
86
103
 
104
+ def get_latest_revision
105
+ res = URI.open("#{baseurl}?action=query&prop=revisions&titles=#{params['title']}&rvslots=main&rvprop=content|timestamp&format=json")
106
+ @latest_revision = JSON.parse(res.read).dig('query', 'pages').values.first.dig('revisions').first
107
+ params['basetimestamp'] = latest_revision['timestamp']
108
+ @latest_content = latest_revision.dig('slots', 'main', 'content') ||
109
+ latest_revision.dig('slots', 'main', '*') ||
110
+ latest_revision.dig('slots', '*') ||
111
+ latest_revision.dig('*')
112
+ end
113
+
87
114
  def get_csrf_token
88
115
  puts("\nGetting csrf token using csrf cookies") if debug
89
- url = URI("#{baseurl}?action=query&meta=tokens&format=json&type=csrf")
116
+ url = URI("#{baseurl}?action=query&meta=tokens&format=json&prop=info|revisions&rvprop=timestamp")
90
117
  res = URI.open(url, header.merge('cookie' => csrf_cookie))
91
118
  json = JSON.parse(res.read)
92
119
  @csrf_token = json.dig('query','tokens','csrftoken')
@@ -94,32 +121,51 @@ class Wikian
94
121
  end
95
122
 
96
123
  def build_query_string
97
- params={}
124
+ @params={}
98
125
  params['action'] = 'edit'
99
126
  params['format'] = Wikian::RESPONSE_FORMAT
100
127
  params['title'] = input_file.sub(/\..*/,'')
128
+ @metadata = File.exist?(Wikian.meta_file) ? YAML.load(File.open(Wikian.meta_file)) : {}
129
+ params['starttimestamp'] =
130
+ if timestamp = metadata.dig('meta', params['title'], 'timestamp')
131
+ timestamp
132
+ else
133
+ FileUtils.mkdir_p(Wikian.meta_dir)
134
+ metadata = {'meta' => {'title' => {'timestamp' => File.ctime(input_file).utc.iso8601}}}
135
+ File.write(Wikian.meta_file, YAML.dump(metadata))
136
+ end
101
137
  wikitext = File.read(input_file)
102
- if args.have?(%w(-a --append))
138
+ if args.have?(%w(-a))
103
139
  params['appendtext'] = wikitext
104
- elsif args.have?(%w(-p --prepend))
140
+ elsif args.have?(%w(-p))
105
141
  params['prependtext'] = wikitext
106
142
  else
107
143
  # pass the wikitext in request body
108
144
  @body_text = wikitext
109
145
  end
110
- if args.have?(%w(-c --captcha))
146
+ if args.have?(%w(-c))
111
147
  params['captchaid'], params['captchaword'] = args[args.index('-c')+1].split(':')
112
148
  end
113
- if args.have?(%w(-m --message))
149
+ if args.have?(%w(-m))
114
150
  params['summary'] = args[args.index('-m')+1]
115
151
  end
116
- if args.have?(%w(-s --section))
152
+ if args.have?(%w(-s))
117
153
  params['section'] = args[args.index('-s')+1]
118
154
  end
119
- @query = URI.encode_www_form(params)
155
+ end
156
+
157
+ def merge_versions
158
+ tmp_local = Tempfile.open {|f| f.write @body_text; f}
159
+ tmp_latest = Tempfile.open {|f| f.write latest_content; f}
160
+ @body_text = %x(diff --line-format %L #{tmp_local.path} #{tmp_latest.path})
161
+ metadata['meta'].merge!(params['title'] => {'timestamp' => Time.now.utc.iso8601})
162
+ rescue => e
163
+ puts "WikiMergeError in #{__FILE__}"
164
+ exit
120
165
  end
121
166
 
122
167
  def upload_article
168
+ @query = URI.encode_www_form(params)
123
169
  puts("\nUploading the wiki article using csrf token #{csrf_token}") if debug
124
170
  url = URI("#{baseurl}?#{query}")
125
171
  req = Net::HTTP::Post.new(url, header.merge('cookie' => csrf_cookie, 'content-type' => 'application/x-www-form-urlencoded'))
@@ -6,13 +6,13 @@ class Wikian
6
6
  def initialize(args)
7
7
  super
8
8
 
9
- @output_file = yaml['api']['srsearch'].first
9
+ @output_file = config['api']['srsearch'].first
10
10
 
11
11
  @params.merge!('format' => Wikian::RESPONSE_FORMAT)
12
12
 
13
13
  @query = @params.to_query
14
14
 
15
- @api_url = URI("https://#{yaml['meta']['site']}/w/api.php?#{query}")
15
+ @api_url = URI("https://#{config['meta']['site']}/w/api.php?#{query}")
16
16
  rescue => e
17
17
  puts "#{e.class} in #{__FILE__}. #{e.message}"
18
18
  exit
@@ -1,12 +1,11 @@
1
- #!/usr/bin/env -S ruby -W0
2
1
  class Wikian
3
2
  class WikianSubcommandError < StandardError; end
4
- class MissingConfigFileError < WikianSubcommandError; end
5
3
  class BadUrlError < WikianSubcommandError; end
6
4
 
7
5
  # class to be inherited by other Wikian classes
8
6
  class Subcommand
9
- attr_accessor :args, :res, :yaml, :query, :title, :api_url, :debug, :output_file
7
+ attr_accessor :args, :res, :config, :query, :title, :api_url,
8
+ :debug, :output_file, :res_body
10
9
 
11
10
  def initialize(args)
12
11
  @args = args
@@ -19,11 +18,15 @@ class Wikian
19
18
 
20
19
  @debug = (args & %w(-d --debug)).length > 0 ? true : false
21
20
 
22
- raise MissingConfigFileError unless File.exist?(Wikian::CONFIG_FILE)
23
- @yaml=YAML.load(File.open(Wikian::CONFIG_FILE))
21
+ @config =
22
+ if File.exist?(Wikian::CONFIG_FILE)
23
+ YAML.load(File.open(Wikian::CONFIG_FILE))
24
+ else
25
+ YAML.load(template)
26
+ end
24
27
 
25
28
  # some params like 'titles' can contain multiple entries joined by '|'. More info in Wikipedia API docs
26
- @params = Hash[yaml['api'].keys.zip(yaml['api'].values.map{|arr| arr.join("|")})]
29
+ @params = Hash[config['api'].keys.zip(config['api'].values.map{|arr| arr.join("|")})]
27
30
  rescue MissingConfigFileError => e
28
31
  puts "#{e.class} try passing the '-t' option to generate #{Wikian::CONFIG_FILE} in #{__FILE__}"
29
32
  exit
@@ -42,27 +45,23 @@ class Wikian
42
45
 
43
46
  # HTTP response file name. Its extension depends on the 'content-type' header
44
47
  def response_file
45
- output_file + '.' + res['content-type'].split('/').last.sub(/;.*/,'')
48
+ output_file + '.' + res.meta['content-type'].split('/').last.sub(/;.*/,'')
46
49
  end
47
50
 
48
51
  # write response in to `response_file`
49
52
  def write_response
50
53
  STDERR.puts "Writing to #{response_file}"
51
54
  File.open(response_file, 'w') do |f|
52
- f.puts prettify(res.body)
55
+ f.puts prettify(res_body)
53
56
  end
54
57
  end
55
58
 
56
59
  def doit
57
60
  puts api_url if debug
58
61
 
59
- req = Net::HTTP::Get.new(api_url, yaml['meta']['headers'])
62
+ @res=URI.open(api_url, config['meta']['headers'])
60
63
 
61
- http = Net::HTTP.new(api_url.host, api_url.port)
62
-
63
- http.use_ssl = true
64
-
65
- @res=http.request(req)
64
+ @res_body = res.read
66
65
 
67
66
  write_response
68
67
  rescue => e
@@ -74,7 +73,7 @@ class Wikian
74
73
 
75
74
  # if response is JSON prettify it, otherwise return it unchanged
76
75
  def prettify(str)
77
- res['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
76
+ res.meta['content-type'].match?('json') ? JSON.pretty_generate(JSON.parse(str)) : str
78
77
  end
79
78
  end
80
79
  end
@@ -1,3 +1,3 @@
1
1
  class Wikian
2
- VERSION = "0.1.12"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikian
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.12
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - sergioro
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-13 00:00:00.000000000 Z
11
+ date: 2020-09-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Get and update Wikipedia articles
14
14
  email: