sunflower 0.4.5 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,255 @@
1
+ # coding: utf-8
2
+
3
+ # Class representing a list of articles. Inherits from Array.
4
+ class Sunflower::List < Array
5
+ # Create a new article list and fill it with items.
6
+ #
7
+ # Sunflower may be nil; this will, however, make most methods unavailable.
8
+ #
9
+ # This is in fact a wrapper for various list generator methods,
10
+ # each private, named with the format of "list_<type>",
11
+ # which accept the key and opts arguments and return arrays.
12
+ # You can use this behavior to create your own ones.
13
+ #
14
+ # You should probably use Sunflower#make_list instead of calling this directly.
15
+ def initialize sunflower, type, key, opts={}
16
+ @sunflower = sunflower
17
+
18
+ meth = :"list_#{type}"
19
+ if self.respond_to? meth, true
20
+ super(self.send meth, key, opts)
21
+ else
22
+ raise Sunflower::Error, "no such list type available: #{type}"
23
+ end
24
+ end
25
+
26
+ # Construct new list from an array.
27
+ def self.from_ary ary, sunflower=nil
28
+ Sunflower::List.new sunflower, 'pages', ary
29
+ end
30
+
31
+
32
+ # Converts self to an array of Sunflower::Page objects.
33
+ #
34
+ # Use #pages_preloaded to preload the text of all pages at once, instead of via separate requests.
35
+ def pages
36
+ Array.new self.map{|t| Sunflower::Page.new t, @sunflower }
37
+ end
38
+
39
+ # Converts self to an array of Sunflower::Page objects,
40
+ # then preloads the text in all of them using as little requests as possible.
41
+ # (API limit is at most 500 pages/req for bots, 50 for other users.)
42
+ #
43
+ # If any title is invalid, Sunflower::Error will be raised.
44
+ #
45
+ # If any title is uncanonicalizable by Sunflower#cleanup_title,
46
+ # it will not blow up or return incorrect results; however, text of some other
47
+ # pages may be missing (it will be lazy-loaded when requested, as usual).
48
+ def pages_preloaded
49
+ pgs = self.pages
50
+ at_once = @sunflower.is_bot? ? 500 : 50
51
+
52
+ # this is different from self; page titles are guaranteed to be canonicalized
53
+ titles = pgs.map{|a| a.title }
54
+
55
+ titles.each_slice(at_once).with_index do |slice, slice_no|
56
+ res = @sunflower.API('action=query&prop=revisions&rvprop=content&titles='+CGI.escape(slice.join '|'))
57
+ res['query']['pages'].values.each_with_index do |h, i|
58
+ page = pgs[slice_no*at_once + i]
59
+
60
+ if h['title'] and h['title'] == page.title
61
+ if h['missing']
62
+ page.text = ''
63
+ elsif h['invalid']
64
+ raise Sunflower::Error, 'title invalid: '+page.title
65
+ else
66
+ page.text = h['revisions'][0]['*']
67
+ end
68
+
69
+ page.preloaded_text = true
70
+ end
71
+ end
72
+ end
73
+
74
+ return pgs
75
+ end
76
+
77
+
78
+ private
79
+ # Can be used to create a new list from array. Used internally in .from_ary.
80
+ def list_pages ary, opts={} # :doc:
81
+ ary
82
+ end
83
+
84
+ # Create from plaintext list, each title in separate line.
85
+ def list_plaintext text, opts={} # :doc:
86
+ text.split(/\r?\n/)
87
+ end
88
+
89
+ # Create from file. Supports BOM in UTF-8 files.
90
+ def list_file filename, opts={} # :doc:
91
+ lines = File.readlines(filename)
92
+ lines[0].sub!(/^\357\273\277/, '') # BOM
93
+ lines.each{|ln| ln.chomp! }
94
+ lines.pop while lines.last == ''
95
+ lines
96
+ end
97
+
98
+ # Categories on given page.
99
+ def list_categories_on page, opts={} # :doc:
100
+ r = @sunflower.API_continued('action=query&prop=categories&cllimit=max&titles='+CGI.escape(page), 'pages', 'clcontinue')
101
+ r['query']['pages'].values.first['categories'].map{|v| v['title']}
102
+ end
103
+
104
+ # Category members.
105
+ def list_category cat, opts={} # :doc:
106
+ r = @sunflower.API_continued('action=query&list=categorymembers&cmprop=title&cmlimit=max&cmtitle='+CGI.escape(cat), 'categorymembers', 'cmcontinue')
107
+ r['query']['categorymembers'].map{|v| v['title']}
108
+ end
109
+
110
+ # Category members. Scans categories recursively.
111
+ def list_category_recursive cat, opts={} # :doc:
112
+ list = [] # list of articles
113
+ processed = []
114
+ cats_to_process = [cat] # list of categories to be processes
115
+ while !cats_to_process.empty?
116
+ now = cats_to_process.shift
117
+ processed << now # make sure we do not get stuck in infinite loop
118
+
119
+ list2 = list_category now # get contents of first cat in list
120
+
121
+ # find categories and queue them
122
+ cats_to_process += list2
123
+ .select{|el| el =~ /^#{@sunflower.ns_regex_for 'category'}:/}
124
+ .reject{|el| processed.include? el or cats_to_process.include? el}
125
+
126
+ list += list2 # add articles to main list
127
+ end
128
+ list.uniq!
129
+ return list
130
+ end
131
+
132
+ # Links on given page.
133
+ def list_links_on page, opts={} # :doc:
134
+ r = @sunflower.API_continued('action=query&prop=links&pllimit=max&titles='+CGI.escape(page), 'pages', 'plcontinue')
135
+ r['query']['pages'].values.first['links'].map{|v| v['title']}
136
+ end
137
+
138
+ # Templates used on given page.
139
+ def list_templates_on page, opts={} # :doc:
140
+ r = @sunflower.API_continued('action=query&prop=templates&tllimit=max&titles='+CGI.escape(page), 'pages', 'tlcontinue')
141
+ r['query']['pages'].values.first['templates'].map{|v| v['title']}
142
+ end
143
+
144
+ # Pages edited by given user.
145
+ def list_contribs user, opts={} # :doc:
146
+ r = @sunflower.API_continued('action=query&list=usercontribs&uclimit=max&ucprop=title&ucuser='+CGI.escape(user), 'usercontribs', 'uccontinue')
147
+ r['query']['usercontribs'].map{|v| v['title']}
148
+ end
149
+
150
+ # Pages which link to given page.
151
+ def list_whatlinkshere page, opts={} # :doc:
152
+ r = @sunflower.API_continued('action=query&list=backlinks&bllimit=max&bltitle='+CGI.escape(page), 'backlinks', 'blcontinue')
153
+ r['query']['backlinks'].map{|v| v['title']}
154
+ end
155
+
156
+ # Pages which embed (transclude) given page.
157
+ def list_whatembeds page, opts={} # :doc:
158
+ r = @sunflower.API_continued('action=query&list=embeddedin&eilimit=max&eititle='+CGI.escape(page), 'embeddedin', 'eicontinue')
159
+ r['query']['embeddedin'].map{|v| v['title']}
160
+ end
161
+
162
+ # Pages which used given image.
163
+ def list_image_usage image, opts={} # :doc:
164
+ r = @sunflower.API_continued('action=query&list=imageusage&iulimit=max&iutitle='+CGI.escape(image), 'imageusage', 'iucontinue')
165
+ r['query']['imageusage'].map{|v| v['title']}
166
+ end
167
+
168
+ # Search results for given text.
169
+ #
170
+ # Options:
171
+ # * ns: namespaces to search in, as pipe-separated numbers (or single number). Default: 0 (main).
172
+ def list_search text, opts={} # :doc:
173
+ opts = {ns: 0}.merge opts
174
+ r = @sunflower.API_continued('action=query&list=search&srwhat=text&srlimit=max&srnamespace='+CGI.escape(opts[:ns].to_s)+'&srsearch='+CGI.escape(text), 'search', 'srcontinue')
175
+ r['query']['search'].map{|v| v['title']}
176
+ end
177
+
178
+ # Search results for given text. Only searches in page titles. See also #list_grep.
179
+ #
180
+ # Options:
181
+ # * ns: namespaces to search in, as pipe-separated numbers (or single number). Default: 0 (main).
182
+ def list_search_titles key, opts={} # :doc:
183
+ opts = {ns: 0}.merge opts
184
+ r = @sunflower.API_continued('action=query&list=search&srwhat=title&srlimit=max&srnamespace='+CGI.escape(opts[:ns].to_s)+'&srsearch='+CGI.escape(key), 'search', 'srcontinue')
185
+ r['query']['search'].map{|v| v['title']}
186
+ end
187
+
188
+ # `count` random pages.
189
+ def list_random count, opts={} # :doc:
190
+ r = @sunflower.API_continued('action=query&list=random&rnnamespace=0&rnlimit='+CGI.escape(count.to_s), 'random', 'rncontinue')
191
+ r['query']['random'].map{|v| v['title']}
192
+ end
193
+
194
+ # External link search. Format like on Special:LinkSearch.
195
+ def list_linksearch url, opts={} # :doc:
196
+ r = @sunflower.API_continued('action=query&list=exturlusage&eulimit=max&euprop=title&euquery='+CGI.escape(url), 'exturlusage', 'eucontinue')
197
+ r['query']['exturlusage'].map{|v| v['title']}
198
+ end
199
+
200
+ # Pages whose titles match given regex. Uses nikola's grep tool: http://toolserver.org/~nikola/grep.php
201
+ #
202
+ # Options:
203
+ # * ns: namespace to search in, as a number (default: 0, main)
204
+ # * redirs: whether to include redirects in search results (default: true)
205
+ def list_grep regex, opts={} # :doc:
206
+ opts = {ns: 0, redirs: true}.merge opts
207
+ lang, wiki = @sunflower.wikiURL.split '.', 2
208
+
209
+ list = []
210
+
211
+ p = RestClient.get("http://toolserver.org/~nikola/grep.php?pattern=#{CGI.escape regex}&lang=#{CGI.escape lang}&wiki=#{CGI.escape wiki}&ns=#{CGI.escape opts[:ns].to_s}#{opts[:redirs] ? '&redirects=on' : ''}")
212
+ p.scan(/<tr><td><a href="http:\/\/#{@sunflower.wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)(?:\?redirect=no|)">/){
213
+ list << @sunflower.cleanup_title($1)
214
+ }
215
+ return list
216
+ end
217
+ end
218
+
219
+ class Sunflower
220
+ # Makes a list of articles. Returns array of titles.
221
+ def make_list type, key, opts={}
222
+ begin
223
+ return Sunflower::List.new self, type, key, opts
224
+ rescue Sunflower::Error => e
225
+ if e.message == "no such list type available: #{type}"
226
+ backwards_compat = {
227
+ :categorieson => :categories_on,
228
+ :categoryrecursive => :category_recursive,
229
+ :categoryr => :category_recursive,
230
+ :linkson => :links_on,
231
+ :templateson => :templates_on,
232
+ :transclusionson => :templates_on,
233
+ :usercontribs => :contribs,
234
+ :whatlinksto => :whatlinkshere,
235
+ :whattranscludes => :whatembeds,
236
+ :imageusage => :image_usage,
237
+ :image => :image_usage,
238
+ :searchtitles => :search_titles,
239
+ :external => :linksearch,
240
+ :regex => :grep,
241
+ :regexp => :grep,
242
+ }
243
+
244
+ if type2 = backwards_compat[type.to_s.downcase.gsub(/[^a-z]/, '').to_sym]
245
+ warn "warning: #{type} has been renamed to #{type2}, old name will be removed in v0.6"
246
+ Sunflower::List.new self, type2, key, opts
247
+ else
248
+ raise e
249
+ end
250
+ else
251
+ raise e
252
+ end
253
+ end
254
+ end
255
+ end
data/lib/sunflower.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # coding: utf-8
2
2
  require 'sunflower/core'
3
3
  require 'sunflower/commontasks'
4
- require 'sunflower/listmaker'
4
+ require 'sunflower/list'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sunflower
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: '0.5'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-05 00:00:00.000000000 Z
12
+ date: 2012-08-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -55,26 +55,17 @@ files:
55
55
  - LICENSE
56
56
  - bin/sunflower-setup
57
57
  - example-bot.rb
58
- - use-easy-bot.rb
59
58
  - lib/sunflower.rb
60
59
  - lib/sunflower/core.rb
61
60
  - lib/sunflower/commontasks.rb
62
- - lib/sunflower/listmaker.rb
61
+ - lib/sunflower/list.rb
63
62
  - scripts/fix-bold-in-headers.rb
64
- - scripts/fix-multiple-same-refs.rb
65
63
  - scripts/fix-langs.rb
66
64
  - scripts/lekkoatl-portal.rb
67
- - scripts/ZDBOT.rb
68
- - scripts/aktualizacjapilkarzy.rb
69
- - scripts/changeimage.rb
70
- - scripts/insight.rb
71
- - scripts/make-id2team-list.rb
72
- - scripts/author-list.rb
73
65
  - scripts/fix-unicode-control-chars.rb
74
66
  - scripts/fix-double-pipes.rb
75
67
  - scripts/fix-some-entities.rb
76
68
  - scripts/recat.rb
77
- - scripts/wanted.rb
78
69
  homepage: http://github.com/MatmaRex/Sunflower
79
70
  licenses: []
80
71
  post_install_message:
@@ -1,160 +0,0 @@
1
- # coding: utf-8
2
- class Sunflower
3
- # Makes a list of articles. Returns array of titles.
4
- def make_list type, *parameters
5
- type=type.downcase.gsub(/[^a-z]/, '')
6
- first=parameters[0]
7
- firstE=CGI.escape first.to_s
8
-
9
- case type
10
- when 'file'
11
- f=File.open first
12
- list=f.read.sub(/\357\273\277/,'').strip.split(/\r?\n/)
13
- f.close
14
-
15
- when 'page', 'pages'
16
- list=parameters
17
-
18
- when 'input'
19
- puts 'Insert titles of articles to edit:'
20
- puts 'Press [Enter] without inputting any text to finish.'
21
- puts 'Press [Ctrl]+[C] to kill bot.'
22
- list=[]
23
- while true
24
- input=gets.strip
25
- break if input==''
26
-
27
- list<<input
28
- end
29
-
30
- when 'categorieson'
31
- r=self.API('action=query&prop=categories&cllimit=500&titles='+firstE)
32
- list=r['query']['pages'].first['categories'].map{|v| v['title']}
33
-
34
- when 'category'
35
- r=self.API('action=query&list=categorymembers&cmprop=title&cmlimit=5000&cmtitle='+firstE)
36
- list=r['query']['categorymembers'].map{|v| v['title']}
37
-
38
- when 'categoryr', 'categoryrecursive'
39
- list = [] # list of articles
40
- processed = []
41
- cats_to_process = [first] # list of categories to be processes
42
- while !cats_to_process.empty?
43
- now = cats_to_process.shift
44
- processed << now # make sure we do not get stuck in infinite loop
45
-
46
- list2 = self.make_list 'category', now # get contents of first cat in list
47
-
48
- # find categories and queue them
49
- cats_to_process += list2
50
- .select{|el| el=~/\AKategoria:/}
51
- .reject{|el| processed.include? el or cats_to_process.include? el}
52
-
53
- list += list2 # add articles to main list
54
- end
55
- list.uniq!
56
-
57
- when 'linkson'
58
- r=self.API('action=query&prop=links&pllimit=5000&titles='+firstE)
59
- list=r['query']['pages'].first['links'].map{|v| v['title']}
60
-
61
- when 'transclusionson', 'templateson'
62
- r=self.API('action=query&prop=templates&tllimit=5000&titles='+firstE)
63
- list=r['query']['pages'].first['templates'].map{|v| v['title']}
64
-
65
- when 'usercontribs', 'contribs'
66
- r=self.API('action=query&list=usercontribs&uclimit=5000&ucprop=title&ucuser='+firstE)
67
- list=r['query']['usercontribs'].map{|v| v['title']}
68
-
69
- when 'whatlinksto', 'whatlinkshere'
70
- r=self.API('action=query&list=backlinks&bllimit=5000&bltitle='+firstE)
71
- list=r['query']['backlinks'].map{|v| v['title']}
72
-
73
- when 'whattranscludes', 'whatembeds'
74
- r=self.API('action=query&list=embeddedin&eilimit=5000&eititle='+firstE)
75
- list=r['query']['embeddedin'].map{|v| v['title']}
76
-
77
- when 'image', 'imageusage'
78
- r=self.API('action=query&list=imageusage&iulimit=5000&iutitle='+firstE)
79
- list=r['query']['imageusage'].map{|v| v['title']}
80
-
81
- when 'search'
82
- r=self.API('action=query&list=search&srwhat=text&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
83
- list=r['query']['search'].map{|v| v['title']}
84
-
85
- when 'searchtitles'
86
- r=self.API('action=query&list=search&srwhat=title&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
87
- list=r['query']['search'].map{|v| v['title']}
88
-
89
- when 'random'
90
- r=self.API('action=query&list=random&rnnamespace=0&rnlimit='+firstE)
91
- list=r['query']['random'].map{|v| v['title']}
92
-
93
- when 'external', 'linksearch'
94
- r=self.API('action=query&euprop=title&list=exturlusage&eulimit=5000&euquery='+firstE)
95
- list=r['query']['exturlusage'].map{|v| v['title']}
96
-
97
- when 'google'
98
- limit=[parameters[1].to_i,999].min
99
- from=0
100
- list=[]
101
-
102
- while from<limit
103
- p=HTTP.get(URI.parse("http://www.google.pl/custom?q=kot&start=#{from}&sitesearch=#{@wikiURL}"))
104
- p.scan(/<div class=g><h2 class=r><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)" class=l>/){
105
- list<<CGI.unescape($1).gsub('_',' ')
106
- }
107
-
108
- from+=10
109
- end
110
-
111
- when 'grep', 'regex', 'regexp'
112
- split=@wikiURL.split('.')
113
- ns=(parameters[1] ? parameters[1].to_s.gsub(/\D/,'') : '0')
114
- redirs=(parameters[2] ? '&redirects=on' : '')
115
- list=[]
116
-
117
- p=HTTP.get(URI.parse("http://toolserver.org/~nikola/grep.php?pattern=#{firstE}&lang=#{split[0]}&wiki=#{split[1]}&ns=#{ns}#{redirs}"))
118
- p.scan(/<tr><td><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)(?:\?redirect=no|)">/){
119
- list<<CGI.unescape($1).gsub('_',' ')
120
- }
121
- end
122
-
123
- return list
124
- end
125
- end
126
-
127
- if $0==__FILE__
128
- puts 'What kind of list do you want to create?'
129
- if !(t=ARGV.shift)
130
- t=gets
131
- else
132
- t=t.strip
133
- puts t
134
- end
135
- puts ''
136
-
137
- puts 'Supply arguments to pass to listmaker:'
138
- puts '(press [Enter] without writing anything to finish)'
139
- arg=[]
140
- ARGV.each do |i|
141
- arg<<i.strip
142
- puts i.strip
143
- end
144
- while (a=gets.strip)!=''
145
- arg<<a
146
- end
147
-
148
- puts 'Making list, wait patiently...'
149
-
150
- s=Sunflower.new
151
- s.login
152
-
153
- l=s.make_list(t, *arg)
154
- l.sort!
155
- f=File.open('list.txt','w')
156
- f.write(l.join("\n"))
157
- f.close
158
-
159
- puts 'Done! List saved to "list.txt".'
160
- end
data/scripts/ZDBOT.rb DELETED
@@ -1,62 +0,0 @@
1
- require 'sunflower-core.rb'
2
- require 'sunflower-commontasks.rb'
3
- s=Sunflower.new
4
- s.login
5
-
6
- $summary='archiwizacja zadań'
7
-
8
- pp=Page.get('Wikipedia:Zadania dla botów')
9
- tasks=pp.text
10
-
11
- tasksDone=[]
12
- tasksError=[]
13
- tasksOld=[]
14
-
15
- tasks=tasks.gsub(/\n==\s*(.+?)\s*==\s*\{\{\/Status\|([^}]+)\}\}([\s\S]+?)(?=\r?\n==|\s*\Z)/) do
16
- title=$1.strip
17
- status=$2.strip
18
- text=$3.strip
19
-
20
- bval=''
21
-
22
- if (['wykonane','zrobione','błąd','błędne','stare'].index(status)==nil)
23
- bval=$&
24
- elsif (status=='wykonane' || status=='zrobione')
25
- tasksDone<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
26
- elsif (status=='błąd' || status=='błędne')
27
- tasksError<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
28
- elsif (status=='stare')
29
- tasksOld<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
30
- end
31
-
32
- bval
33
- end
34
-
35
- puts 'Data loaded. Saving...'
36
-
37
- p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/błędne')
38
- p.append tasksError.join("\n\n") unless tasksError.empty?
39
- p.save unless tasksError.empty?
40
- puts 'Error - saved.'
41
-
42
- p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/wykonane')
43
- p.append tasksDone.join("\n\n") unless tasksDone.empty?
44
- p.save unless tasksDone.empty?
45
- puts 'Done - saved.'
46
-
47
- p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/stare')
48
- p.append tasksOld.join("\n\n") unless tasksOld.empty?
49
- p.save unless tasksOld.empty?
50
- puts 'Old - saved.'
51
-
52
- pp.text=tasks
53
- pp.save
54
- puts 'Main - saved.'
55
-
56
- # File.open('ZDBOT_main.txt','w').write(tasks)
57
- # File.open('ZDBOT_done.txt','w').write(tasksDone.join("\n\n")) unless tasksDone.empty?
58
- # File.open('ZDBOT_error.txt','w').write(tasksError.join("\n\n")) unless tasksError.empty?
59
- # File.open('ZDBOT_old.txt','w').write(tasksOld.join("\n\n")) unless tasksOld.empty?
60
-
61
- puts "Stats: done: #{tasksDone.length}; error: #{tasksError.length}; old: #{tasksOld.length}"
62
- gets