sunflower 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -1,4 +1,4 @@
1
- Version: 0.4.3 alpha
1
+ Version: 0.4.4 alpha
2
2
 
3
3
  >>> English:
4
4
 
@@ -2,7 +2,6 @@
2
2
  # extends Page with some methods letting easily perform common tasks
3
3
 
4
4
  class Page
5
- def execute commands
6
5
  # executes methods on self
7
6
  # "commands" is array of arrays
8
7
  # page.execute([
@@ -18,6 +17,7 @@ class Page
18
17
  # oi:module, only-if:module
19
18
  # !oi:module, only-if-not:module
20
19
  # s:append to summary, summary:append to summary
20
+ def execute commands
21
21
  originalText = self.text.dup
22
22
 
23
23
  commands.each do |cmd|
@@ -75,9 +75,9 @@ class Page
75
75
 
76
76
 
77
77
 
78
- def replace from, to, once=false
79
78
  # replaces "from" with "to" in page text
80
79
  # "from" may be regex
80
+ def replace from, to, once=false
81
81
  self.text = self.text.send( (once ? 'sub' : 'gsub'), from, to )
82
82
  end
83
83
  def gsub from, to
@@ -87,31 +87,23 @@ class Page
87
87
  self.replace from, to, true
88
88
  end
89
89
 
90
- def append txt, newlines=2
91
90
  # appends newlines and text
92
91
  # by default - 2 newlines
92
+ def append txt, newlines=2
93
93
  self.text = self.text.rstrip + ("\n"*newlines) + txt
94
94
  end
95
95
 
96
- def prepend txt, newlines=2
97
96
  # prepends text and newlines
98
97
  # by default - 2 newlines
98
+ def prepend txt, newlines=2
99
99
  self.text = txt + ("\n"*newlines) + self.text.lstrip
100
100
  end
101
101
 
102
- def code_cleanup
103
- # simple, safe code cleanup
104
- # use Sunflower.always_do_code_cleanup=true to do it automatically just before saving page
102
+ # plwiki-specific cleanup routines.
105
103
  # based on Nux's cleaner: http://pl.wikipedia.org/wiki/Wikipedysta:Nux/wp_sk.js
106
- str=self.text.gsub(/\r\n/,"\n")
104
+ def code_cleanup_plwiki str
105
+ str = str.dup
107
106
 
108
- str.gsub!(/\{\{\s*([^|{}]+ |uni|)stub2?(\|[^{}]+)?\}\}/i){
109
- if $1=='sekcja '
110
- '{{sekcja stub}}'
111
- else
112
- '{{stub}}'
113
- end
114
- }
115
107
  str.gsub!(/\{\{\{(?:poprzednik|następca|pop|nast|lata|info|lang)\|(.+?)\}\}\}/i,'\1')
116
108
  str.gsub!(/(={1,5})\s*Przypisy\s*\1\s*<references\s?\/>/i){
117
109
  if $1=='=' || $1=='=='
@@ -121,21 +113,6 @@ class Page
121
113
  end
122
114
  }
123
115
 
124
- str.gsub!(/\[\[([^\|#\]]*)([^\|\]]*)(\||\]\])/){
125
- name, anchor, _end = $1, $2, $3
126
-
127
- begin
128
- name = name.gsub(/((?:%[0-9a-fA-F]{2})+)/){ [$1.delete('%')].pack('H*') }
129
- anchor = (anchor||'').gsub(/\.([0-9A-F]{2})/, '%\1').gsub(/((?:%[0-9a-fA-F]{2})+)/){ [$1.delete('%')].pack('H*') }
130
- a='[['+name+anchor+(_end||'')
131
- a=a.gsub '_', ' '
132
- rescue
133
- a=('[['+name+(anchor||'')+(_end||'')).gsub '_', ' '
134
- end
135
-
136
- a
137
- }
138
-
139
116
  # sklejanie skrótów linkowych
140
117
  str.gsub!(/m\.? ?\[\[n\.? ?p\.? ?m\.?\]\]/, 'm [[n.p.m.]]');
141
118
 
@@ -165,102 +142,46 @@ class Page
165
142
  str.gsub!(/[ \n\t]*\n'''? *((Zewnętrzn[ey] )?(Linki?|Łącza|Stron[ay]|Zobacz w (internecie|sieci))( zewn[eę]trzn[aey])?):* *'''?[ \n\t]*/i, "\n\n== Linki zewnętrzne ==\n");
166
143
  str.gsub!(/[ \n\t]*\n(=+) *((Zewnętrzn[ey] )?(Linki?|Łącza|Stron[ay]|Zobacz w (internecie|sieci))( zewn[eę]trzn[aey])?):* *=+[ \n\t]*/i, "\n\n\\1 Linki zewnętrzne \\1\n");
167
144
 
168
- # nagłówki
145
+ return str
146
+ end
147
+
148
+ # simple, safe code cleanup
149
+ # use Sunflower.always_do_code_cleanup=true to do it automatically just before saving page
150
+ def code_cleanup
151
+ str = self.text.gsub /\r\n/, "\n"
152
+
153
+ str.gsub!(/\[\[([^\|\]]+)(\||\]\])/){
154
+ name, rest = $1, $2
155
+ "[[#{self.sunflower.cleanup_title name}#{rest}"
156
+ }
157
+
158
+ # headings
169
159
  str.gsub!(/(^|\n)(=+) *([^=\n]*[^ :=\n])[ :]*=/, '\1\2 \3 ='); # =a= > = a =, =a:= > = a =
170
- str.gsub!(/(^|\n)(=+[^=\n]+=+)[\n]{2,}/, "\\1\\2\n"); # jeden \n
160
+ str.gsub!(/(^|\n)(=+[^=\n]+=+)[\n]{2,}/, "\\1\\2\n"); # one newline
171
161
 
172
- # listy ze spacjami
162
+ # spaced lists
173
163
  str.gsub!(/(\n[#*:;]+)([^ \t\n#*:;{])/, '\1 \2');
174
164
 
175
- # poprawa nazw przestrzeni i drobne okoliczne
176
- str.gsub!(/\[\[(:?) *(image|grafika|file|plik) *: *([^ ])/i){'[['+$1+'Plik:'+$3.upcase}
177
- str.gsub!(/\[\[(:?) *(category|kategoria) *: *([^ ])/i){'[['+$1+'Kategoria:'+$3.upcase}
178
- str.gsub!(/\[\[ *(:?) *(template|szablon) *: *([^ ])/i){'[['+'Szablon:'+$3.upcase}
179
- str.gsub!(/\[\[ *(:?) *(special|specjalna) *: *([^ ])/i){'[['+'Specjalna:'+$3.upcase}
180
-
181
- 3.times { str.gsub!('{{stub}}{{stub}}', '{{stub}}') }
165
+ if wikiid = self.sunflower.siteinfo['general']['wikiid']
166
+ if self.respond_to? :"code_cleanup_#{wikiid}"
167
+ str = self.call :"code_cleanup_#{wikiid}", str
168
+ end
169
+ end
182
170
 
183
171
  self.text = str
184
172
  end
185
-
186
- def friendly_infobox
187
- # cleans up infoboxes
188
- # might make mistakes! use at your own risk!
189
- def makeFriendly(nazwa,zaw)
190
- zaw.gsub!(/<!--.+?-->/,'')
191
- nazwa=nazwa.gsub('_',' ').strip
192
-
193
- #escapowanie parametrów
194
- zaw.gsub!(/<<<(#+)>>>/,"<<<#\\1>>>")
195
- #wewnętrzne szablony
196
- while zaw=~/\{\{[^}]+\|[^}]+\}\}/
197
- zaw.gsub!($&,$&.gsub(/\|/,'<<<#>>>'))
198
- end
199
- #wewnętrzne linki
200
- while zaw=~/\[\[[^\]]+\|[^\]]+\]\]/
201
- zaw.gsub!($&,$&.gsub(/\|/,'<<<#>>>'))
202
- end
203
-
204
- zaw.sub!(/\A\s*\|\s*/,'') #usunięcie pierwszego pipe'a
205
- lines=zaw.split('|')
206
-
207
- # te tablice przechowują odpowiednio nazwy i wartości kolejnych parametrów
208
- names=[]
209
- values=[]
210
-
211
- for line in lines
212
- line.gsub!(/<<<#>>>/,'|')
213
- line.gsub!(/<<<#(#+)>>>/,"<<<\\1>>>") #odescapowanie
214
-
215
- line=~/\A\s*(.+?)\s*=\s*([\s\S]*?)\s*\Z/
216
- if $&==nil
217
- next
218
- end
219
- name=$1.strip
220
- value=$2.strip
221
-
222
- names<<name
223
- values<<value
224
- end
225
-
226
- zaw=''
227
- names.each_index{|i|
228
- zaw+=' | '+names[i]+' = '+values[i]+"\n"
229
- }
230
-
231
- # grupowane koordynaty
232
- zaw.gsub!(/\s*\| minut/, ' | minut')
233
- zaw.gsub!(/\s*\| sekund/, ' | sekund')
234
-
235
- return '{{'+nazwa[0,1].upcase+nazwa[1,999]+"\n"+zaw+'}}'+"\n"
236
- end
237
173
 
238
- nstr=''
239
- while str!=''
240
- str=~/(\s*)\{\{([^|}]+[ _]infobo[^|}]+|[wW]ładca)((?:[^{}]|[^{}][{}][^{}]|\{\{(?:[^{}]|[^{}][{}][^{}]|\{\{[^{}]+\}\})+\}\})+)\}\}(?:\s*)/
241
-
242
- spaces=($1!='' ? "\n" : '')
243
- before=($`==nil ? '' : $`)
244
- name=$2
245
- inner=$3
246
- match=$&
247
- if match!=nil
248
- result=makeFriendly(name,inner)
249
- nstr+=before+spaces+result
250
- else
251
- nstr+=str
252
- break
253
- end
254
-
255
- str=str.sub(before+match,'')
256
- end
257
-
258
- self.text = nstr
259
- end
260
-
174
+ # Replace the category from with category to in page wikitext.
175
+ #
176
+ # Inputs can be either with the Category: prefix (or localised version) or without.
261
177
  def change_category from, to
262
- from=from.sub(/\A\s*([cC]ategory|[kK]ategoria):/, '').strip
263
- to=to.sub(/\A\s*([cC]ategory|[kK]ategoria):/, '').strip
264
- self.text = self.text.gsub(/\[\[ *(?:[cC]ategory|[kK]ategoria) *: *#{Regexp.escape from} *(\|[^\]]*|)\]\]/){'[[Kategoria:'+to+($1=='| ' ? $1 : $1.rstrip)+']]'}
178
+ cat_regex = self.sunflower.ns_regex_for 'Category'
179
+ from = self.sunflower.cleanup_title(from).sub(/^#{cat_regex}:/, '')
180
+ to = self.sunflower.cleanup_title(to ).sub(/^#{cat_regex}:/, '')
181
+
182
+ self.text.gsub!(/\[\[ *#{cat_regex} *: *#{Regexp.escape from} *(\||\]\])/){
183
+ rest = $1
184
+ "[[#{self.sunflower.ns_local_for 'Category'}:#{to}#{rest}"
185
+ }
265
186
  end
266
- end
187
+ end
@@ -19,7 +19,7 @@ class SunflowerError < StandardError; end
19
19
  #
20
20
  # You can use multiple Sunflowers at once, to work on multiple wikis.
21
21
  class Sunflower
22
- VERSION = '0.4.3'
22
+ VERSION = '0.4.4'
23
23
 
24
24
  INVALID_CHARS = %w(# < > [ ] | { })
25
25
  INVALID_CHARS_REGEX = Regexp.union *INVALID_CHARS
@@ -29,14 +29,24 @@ class Sunflower
29
29
  File.join(ENV['HOME'], 'sunflower-userdata')
30
30
  end
31
31
 
32
- # Options for this Sunflower.
33
- attr_accessor :summary, :always_do_code_cleanup
34
- attr_accessor :cookie, :headers, :wikiURL
32
+ # Summary used when saving edits with this Sunflower.
33
+ attr_accessor :summary
34
+ # Whether to run #code_cleanup when calling #save.
35
+ attr_accessor :always_do_code_cleanup
36
+ # The URL this Sunflower works on, as provided as argument to #initialize.
37
+ attr_reader :wikiURL
38
+ # Siteinfo, as returned by API call.
39
+ attr_accessor :siteinfo
35
40
 
41
+ # Whether this user (if logged in) has bot rights.
36
42
  def is_bot?; @is_bot; end
37
43
 
38
- attr_writer :warnings, :log
44
+ # Whether to output warning messages (using Kernel#warn). Defaults to true.
45
+ attr_writer :warnings
39
46
  def warnings?; @warnings; end
47
+
48
+ # Whether to output log messages (to a file named log.txt in current directory). Defaults to false.
49
+ attr_writer :log
40
50
  def log?; @log; end
41
51
 
42
52
  # Initialize a new Sunflower working on a wiki with given URL, for ex. "pl.wikipedia.org".
@@ -62,6 +72,32 @@ class Sunflower
62
72
  @wikiURL=url
63
73
 
64
74
  @loggedin=false
75
+
76
+ siprop = 'general|namespaces|namespacealiases|specialpagealiases|magicwords|interwikimap|dbrepllag|statistics|usergroups|extensions|fileextensions|rightsinfo|languages|skins|extensiontags|functionhooks|showhooks|variables'
77
+ @siteinfo = self.API(action: 'query', meta: 'siteinfo', siprop: siprop)['query']
78
+
79
+ _build_ns_map
80
+ end
81
+
82
+ # Private. Massages data from siteinfo to be used for recognizing namespaces.
83
+ def _build_ns_map
84
+ @namespace_to_id = {} # all keys lowercase
85
+ @namespace_id_to_canon = {}
86
+ @namespace_id_to_local = {}
87
+
88
+ @siteinfo['namespaces'].each_value do |h|
89
+ next if h['content']
90
+
91
+ id = h['id'].to_i
92
+ @namespace_id_to_canon[id] = h['canonical']
93
+ @namespace_id_to_local[id] = h['*']
94
+
95
+ @namespace_to_id[ h['canonical'].downcase ] = id
96
+ @namespace_to_id[ h['*'].downcase ] = id
97
+ end
98
+ @siteinfo['namespacealiases'].each do |h|
99
+ @namespace_to_id[ h['*'].downcase ] = h['id'].to_i
100
+ end
65
101
  end
66
102
 
67
103
  # Call the API. Returns a hash of JSON response. Request can be a HTTP request string or a hash.
@@ -69,7 +105,7 @@ class Sunflower
69
105
  if request.is_a? String
70
106
  request += '&format=json'
71
107
  elsif request.is_a? Hash
72
- request.merge!({format:'json'})
108
+ request = request.merge({format:'json'})
73
109
  end
74
110
 
75
111
  resp = RestClient.post(
@@ -145,33 +181,88 @@ class Sunflower
145
181
  return self
146
182
  end
147
183
 
148
- def log t
149
- File.open('log.txt','a'){|f| f.puts t} if @log
184
+ # Log message to a file named log.txt in current directory, if logging is enabled. See #log= / #log?.
185
+ def log message
186
+ File.open('log.txt','a'){|f| f.puts message} if @log
187
+ end
188
+
189
+ # Cleans up underscores, percent-encoding and title-casing in title (with optional anchor).
190
+ def cleanup_title title
191
+ name, anchor = title.split '#', 2
192
+
193
+ # CGI.unescape also changes pluses to spaces; code borrowed from there
194
+ unescape = lambda{|a| a.gsub(/((?:%[0-9a-fA-F]{2})+)/){ [$1.delete('%')].pack('H*') } }
195
+
196
+ ns = nil
197
+ name = unescape.call(name).gsub(/[ _]+/, ' ').strip
198
+ anchor = unescape.call(anchor.gsub(/\.([0-9a-fA-F]{2})/, '%\1')).gsub(/[ _]+/, ' ').strip if anchor
199
+
200
+ # FIXME unicode? downcase, upcase
201
+
202
+ if name.include? ':'
203
+ maybe_ns, part_name = name.split ':', 2
204
+ if ns_id = @namespace_to_id[maybe_ns.strip.downcase]
205
+ ns, name = @namespace_id_to_local[ns_id], part_name.strip
206
+ end
207
+ end
208
+
209
+ name[0] = name[0].upcase if @siteinfo["general"]["case"] == "first-letter"
210
+
211
+ return [ns ? "#{ns}:" : nil, name, anchor ? "##{anchor}" : nil].join ''
212
+ end
213
+
214
+ # Returns the localized namespace name for ns, which may be namespace number, canonical name, or any namespace alias.
215
+ #
216
+ # Returns nil if passed an invalid namespace.
217
+ def ns_local_for ns
218
+ case ns
219
+ when Numeric
220
+ @namespace_id_to_local[ns.to_i]
221
+ when String
222
+ @namespace_id_to_local[ @namespace_to_id[cleanup_title(ns).downcase] ]
223
+ end
224
+ end
225
+
226
+ # Like #ns_local_for, but returns canonical (English) name.
227
+ def ns_canon_for ns
228
+ case ns
229
+ when Numeric
230
+ @namespace_id_to_canon[ns.to_i]
231
+ when String
232
+ @namespace_id_to_canon[ @namespace_to_id[cleanup_title(ns).downcase] ]
233
+ end
234
+ end
235
+
236
+ # Returns a regular expression that will match given namespace. Rules for input like #ns_local_for.
237
+ #
238
+ # Does NOT handle percent-encoding and underscores. Use #cleanup_title to canonicalize the namespace first.
239
+ def ns_regex_for ns
240
+ id = ns.is_a?(Numeric) ? ns.to_i : @namespace_to_id[cleanup_title(ns).downcase]
241
+ return nil if !id
242
+
243
+ /#{@namespace_to_id.to_a.select{|a| a[1] == id }.map{|a| Regexp.escape a[0] }.join '|' }/i
150
244
  end
151
245
  end
152
246
 
153
- # Class representng single Wiki page. To load specified page, use #new/#get/#load method.
154
- #
155
- # When calling Page.new, at first only the text will be loaded - attributes and edit token will be loaded when needed, or when you call #preload_attrs.
156
- #
157
- # If you are using multiple Sunflowers, you have to specify which wiki this page belongs to using second argument of function; you can pass whole URL (same as when creating new Sunflower) or just language code.
158
- #
159
- # To save page, use #save/#put method. Optional argument is new title page, if ommited, page is saved at old title. Summary can be passed as second parameter. If it's ommited, s.summary is used. If it's empty too, error is raised.
160
- #
161
- # To get Sunflower instance which this page belongs to, use #sunflower of #belongs_to.
247
+ # Class representing a single Wiki page. To load specified page, use #new. To save it back, use #save.
162
248
  class Page
249
+ # Characters which MediaWiki does not permit in page title.
163
250
  INVALID_CHARS = %w(# < > [ ] | { })
251
+ # Regex matching characters which MediaWiki does not permit in page title.
164
252
  INVALID_CHARS_REGEX = Regexp.union *INVALID_CHARS
165
253
 
254
+ # The current text of the page.
166
255
  attr_accessor :text
256
+ # The text of the page, as of when it was loaded.
167
257
  attr_reader :orig_text
168
258
 
259
+ # The Sunflower instance this page belongs to.
169
260
  attr_reader :sunflower
170
- alias :belongs_to :sunflower
171
261
 
172
262
  # this is only for RDoc. wrapped in "if false" to avoid warnings when running with ruby -w
173
263
  if false
174
- attr_reader :pageid, :ns, :title, :touched, :lastrevid, :counter, :length, :starttimestamp, :edittoken, :protection #prop=info
264
+ # Return value of given attribute, as returned by API call prop=info for this page. Lazy-loaded.
265
+ attr_reader :pageid, :ns, :title, :touched, :lastrevid, :counter, :length, :starttimestamp, :edittoken, :protection
175
266
  end
176
267
 
177
268
  # calling any of these accessors will fetch the data.
@@ -182,15 +273,15 @@ class Page
182
273
  end
183
274
  end
184
275
 
276
+ # Load the specified page. Only the text will be immediately loaded - attributes and edit token will be loaded when needed, or when you call #preload_attrs.
277
+ #
278
+ # If you are using multiple Sunflowers, you have to specify which wiki this page belongs to using the second argument of function; you can pass whole URL (same as when creating new Sunflower) or just the language code.
185
279
  def initialize title='', wiki=''
186
280
  raise SunflowerError, 'title invalid: '+title if title =~ INVALID_CHARS_REGEX
187
281
 
188
282
  @modulesExecd=[] #used by sunflower-commontasks.rb
189
283
  @summaryAppend=[] #used by sunflower-commontasks.rb
190
284
 
191
- @title=title
192
- wiki=wiki+'.wikipedia.org' if wiki.index('.')==nil && wiki!=''
193
-
194
285
  if wiki==''
195
286
  count=ObjectSpace.each_object(Sunflower){|o| @sunflower=o}
196
287
  raise SunflowerError, 'you must pass wiki name if using multiple Sunflowers at once!' if count>1
@@ -198,6 +289,9 @@ class Page
198
289
  ObjectSpace.each_object(Sunflower){|o| @sunflower=o if o.wikiURL==wiki}
199
290
  end
200
291
 
292
+ @title = self.sunflower.cleanup_title title
293
+ wiki = wiki+'.wikipedia.org' if wiki.index('.')==nil && wiki!=''
294
+
201
295
  if title==''
202
296
  @text=''
203
297
  @orig_text=''
@@ -207,6 +301,7 @@ class Page
207
301
  preload_text
208
302
  end
209
303
 
304
+ # Load the text of this page. Semi-private.
210
305
  def preload_text
211
306
  r = @sunflower.API('action=query&prop=revisions&rvprop=content&titles='+CGI.escape(@title))
212
307
  r = r['query']['pages'].first
@@ -223,6 +318,7 @@ class Page
223
318
  @preloaded_text = true
224
319
  end
225
320
 
321
+ # Load the metadata associated with this page. Semi-private.
226
322
  def preload_attrs
227
323
  r = @sunflower.API('action=query&prop=info&inprop=protection&intoken=edit&titles='+CGI.escape(@title))
228
324
  r = r['query']['pages'].first
@@ -233,6 +329,7 @@ class Page
233
329
  @preloaded_attrs = true
234
330
  end
235
331
 
332
+ # Save the current text of this page to file (which can be either a filename or an IO).
236
333
  def dump_to file
237
334
  if file.respond_to? :write #probably file or IO
238
335
  file.write @text
@@ -241,10 +338,18 @@ class Page
241
338
  end
242
339
  end
243
340
 
341
+ # Save the current text of this page to a file whose name is based on page title, with non-alphanumeric characters stripped.
244
342
  def dump
245
343
  self.dump_to @title.gsub(/[^a-zA-Z0-9\-]/,'_')+'.txt'
246
344
  end
247
345
 
346
+ # Save the modifications to this page, possibly under a different title. Default summary is this page's Sunflower's summary (see Sunflower#summary=). Default title is the current title.
347
+ #
348
+ # Will not perform API request if no changes were made.
349
+ #
350
+ # Will call #code_cleanup if Sunflower#always_do_code_cleanup is set.
351
+ #
352
+ # Returns the JSON result of API call or nil when API call was not made.
248
353
  def save title=@title, summary=nil
249
354
  preload_attrs unless @preloaded_attrs
250
355
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sunflower
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-01 00:00:00.000000000 Z
12
+ date: 2012-08-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -95,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
95
95
  version: '0'
96
96
  requirements: []
97
97
  rubyforge_project:
98
- rubygems_version: 1.8.23
98
+ rubygems_version: 1.8.24
99
99
  signing_key:
100
100
  specification_version: 3
101
101
  summary: Sunflower is a lightweight library to provide access to MediaWiki API from