sunflower 0.4.5 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,255 @@
1
+ # coding: utf-8
2
+
3
+ # Class representing a list of articles. Inherits from Array.
4
+ class Sunflower::List < Array
5
+ # Create a new article list and fill it with items.
6
+ #
7
+ # Sunflower may be nil; this will, however, make most methods unavailable.
8
+ #
9
+ # This is in fact a wrapper for various list generator methods,
10
+ # each private, named with the format of "list_<type>",
11
+ # which accept the key and opts arguments and return arrays.
12
+ # You can use this behavior to create your own ones.
13
+ #
14
+ # You should probably use Sunflower#make_list instead of calling this directly.
15
+ def initialize sunflower, type, key, opts={}
16
+ @sunflower = sunflower
17
+
18
+ meth = :"list_#{type}"
19
+ if self.respond_to? meth, true
20
+ super(self.send meth, key, opts)
21
+ else
22
+ raise Sunflower::Error, "no such list type available: #{type}"
23
+ end
24
+ end
25
+
26
+ # Construct new list from an array.
27
+ def self.from_ary ary, sunflower=nil
28
+ Sunflower::List.new sunflower, 'pages', ary
29
+ end
30
+
31
+
32
+ # Converts self to an array of Sunflower::Page objects.
33
+ #
34
+ # Use #pages_preloaded to preload the text of all pages at once, instead of via separate requests.
35
+ def pages
36
+ Array.new self.map{|t| Sunflower::Page.new t, @sunflower }
37
+ end
38
+
39
+ # Converts self to an array of Sunflower::Page objects,
40
+ # then preloads the text in all of them using as little requests as possible.
41
+ # (API limit is at most 500 pages/req for bots, 50 for other users.)
42
+ #
43
+ # If any title is invalid, Sunflower::Error will be raised.
44
+ #
45
+ # If any title is uncanonicalizable by Sunflower#cleanup_title,
46
+ # it will not blow up or return incorrect results; however, text of some other
47
+ # pages may be missing (it will be lazy-loaded when requested, as usual).
48
+ def pages_preloaded
49
+ pgs = self.pages
50
+ at_once = @sunflower.is_bot? ? 500 : 50
51
+
52
+ # this is different from self; page titles are guaranteed to be canonicalized
53
+ titles = pgs.map{|a| a.title }
54
+
55
+ titles.each_slice(at_once).with_index do |slice, slice_no|
56
+ res = @sunflower.API('action=query&prop=revisions&rvprop=content&titles='+CGI.escape(slice.join '|'))
57
+ res['query']['pages'].values.each_with_index do |h, i|
58
+ page = pgs[slice_no*at_once + i]
59
+
60
+ if h['title'] and h['title'] == page.title
61
+ if h['missing']
62
+ page.text = ''
63
+ elsif h['invalid']
64
+ raise Sunflower::Error, 'title invalid: '+page.title
65
+ else
66
+ page.text = h['revisions'][0]['*']
67
+ end
68
+
69
+ page.preloaded_text = true
70
+ end
71
+ end
72
+ end
73
+
74
+ return pgs
75
+ end
76
+
77
+
78
+ private
79
+ # Can be used to create a new list from array. Used internally in .from_ary.
80
+ def list_pages ary, opts={} # :doc:
81
+ ary
82
+ end
83
+
84
+ # Create from plaintext list, each title in separate line.
85
+ def list_plaintext text, opts={} # :doc:
86
+ text.split(/\r?\n/)
87
+ end
88
+
89
+ # Create from file. Supports BOM in UTF-8 files.
90
+ def list_file filename, opts={} # :doc:
91
+ lines = File.readlines(filename)
92
+ lines[0].sub!(/^\357\273\277/, '') # BOM
93
+ lines.each{|ln| ln.chomp! }
94
+ lines.pop while lines.last == ''
95
+ lines
96
+ end
97
+
98
+ # Categories on given page.
99
+ def list_categories_on page, opts={} # :doc:
100
+ r = @sunflower.API_continued('action=query&prop=categories&cllimit=max&titles='+CGI.escape(page), 'pages', 'clcontinue')
101
+ r['query']['pages'].values.first['categories'].map{|v| v['title']}
102
+ end
103
+
104
+ # Category members.
105
+ def list_category cat, opts={} # :doc:
106
+ r = @sunflower.API_continued('action=query&list=categorymembers&cmprop=title&cmlimit=max&cmtitle='+CGI.escape(cat), 'categorymembers', 'cmcontinue')
107
+ r['query']['categorymembers'].map{|v| v['title']}
108
+ end
109
+
110
+ # Category members. Scans categories recursively.
111
+ def list_category_recursive cat, opts={} # :doc:
112
+ list = [] # list of articles
113
+ processed = []
114
+ cats_to_process = [cat] # list of categories to be processes
115
+ while !cats_to_process.empty?
116
+ now = cats_to_process.shift
117
+ processed << now # make sure we do not get stuck in infinite loop
118
+
119
+ list2 = list_category now # get contents of first cat in list
120
+
121
+ # find categories and queue them
122
+ cats_to_process += list2
123
+ .select{|el| el =~ /^#{@sunflower.ns_regex_for 'category'}:/}
124
+ .reject{|el| processed.include? el or cats_to_process.include? el}
125
+
126
+ list += list2 # add articles to main list
127
+ end
128
+ list.uniq!
129
+ return list
130
+ end
131
+
132
+ # Links on given page.
133
+ def list_links_on page, opts={} # :doc:
134
+ r = @sunflower.API_continued('action=query&prop=links&pllimit=max&titles='+CGI.escape(page), 'pages', 'plcontinue')
135
+ r['query']['pages'].values.first['links'].map{|v| v['title']}
136
+ end
137
+
138
+ # Templates used on given page.
139
+ def list_templates_on page, opts={} # :doc:
140
+ r = @sunflower.API_continued('action=query&prop=templates&tllimit=max&titles='+CGI.escape(page), 'pages', 'tlcontinue')
141
+ r['query']['pages'].values.first['templates'].map{|v| v['title']}
142
+ end
143
+
144
+ # Pages edited by given user.
145
+ def list_contribs user, opts={} # :doc:
146
+ r = @sunflower.API_continued('action=query&list=usercontribs&uclimit=max&ucprop=title&ucuser='+CGI.escape(user), 'usercontribs', 'uccontinue')
147
+ r['query']['usercontribs'].map{|v| v['title']}
148
+ end
149
+
150
+ # Pages which link to given page.
151
+ def list_whatlinkshere page, opts={} # :doc:
152
+ r = @sunflower.API_continued('action=query&list=backlinks&bllimit=max&bltitle='+CGI.escape(page), 'backlinks', 'blcontinue')
153
+ r['query']['backlinks'].map{|v| v['title']}
154
+ end
155
+
156
+ # Pages which embed (transclude) given page.
157
+ def list_whatembeds page, opts={} # :doc:
158
+ r = @sunflower.API_continued('action=query&list=embeddedin&eilimit=max&eititle='+CGI.escape(page), 'embeddedin', 'eicontinue')
159
+ r['query']['embeddedin'].map{|v| v['title']}
160
+ end
161
+
162
+ # Pages which used given image.
163
+ def list_image_usage image, opts={} # :doc:
164
+ r = @sunflower.API_continued('action=query&list=imageusage&iulimit=max&iutitle='+CGI.escape(image), 'imageusage', 'iucontinue')
165
+ r['query']['imageusage'].map{|v| v['title']}
166
+ end
167
+
168
+ # Search results for given text.
169
+ #
170
+ # Options:
171
+ # * ns: namespaces to search in, as pipe-separated numbers (or single number). Default: 0 (main).
172
+ def list_search text, opts={} # :doc:
173
+ opts = {ns: 0}.merge opts
174
+ r = @sunflower.API_continued('action=query&list=search&srwhat=text&srlimit=max&srnamespace='+CGI.escape(opts[:ns].to_s)+'&srsearch='+CGI.escape(text), 'search', 'srcontinue')
175
+ r['query']['search'].map{|v| v['title']}
176
+ end
177
+
178
+ # Search results for given text. Only searches in page titles. See also #list_grep.
179
+ #
180
+ # Options:
181
+ # * ns: namespaces to search in, as pipe-separated numbers (or single number). Default: 0 (main).
182
+ def list_search_titles key, opts={} # :doc:
183
+ opts = {ns: 0}.merge opts
184
+ r = @sunflower.API_continued('action=query&list=search&srwhat=title&srlimit=max&srnamespace='+CGI.escape(opts[:ns].to_s)+'&srsearch='+CGI.escape(key), 'search', 'srcontinue')
185
+ r['query']['search'].map{|v| v['title']}
186
+ end
187
+
188
+ # `count` random pages.
189
+ def list_random count, opts={} # :doc:
190
+ r = @sunflower.API_continued('action=query&list=random&rnnamespace=0&rnlimit='+CGI.escape(count.to_s), 'random', 'rncontinue')
191
+ r['query']['random'].map{|v| v['title']}
192
+ end
193
+
194
+ # External link search. Format like on Special:LinkSearch.
195
+ def list_linksearch url, opts={} # :doc:
196
+ r = @sunflower.API_continued('action=query&list=exturlusage&eulimit=max&euprop=title&euquery='+CGI.escape(url), 'exturlusage', 'eucontinue')
197
+ r['query']['exturlusage'].map{|v| v['title']}
198
+ end
199
+
200
+ # Pages whose titles match given regex. Uses nikola's grep tool: http://toolserver.org/~nikola/grep.php
201
+ #
202
+ # Options:
203
+ # * ns: namespace to search in, as a number (default: 0, main)
204
+ # * redirs: whether to include redirects in search results (default: true)
205
+ def list_grep regex, opts={} # :doc:
206
+ opts = {ns: 0, redirs: true}.merge opts
207
+ lang, wiki = @sunflower.wikiURL.split '.', 2
208
+
209
+ list = []
210
+
211
+ p = RestClient.get("http://toolserver.org/~nikola/grep.php?pattern=#{CGI.escape regex}&lang=#{CGI.escape lang}&wiki=#{CGI.escape wiki}&ns=#{CGI.escape opts[:ns].to_s}#{opts[:redirs] ? '&redirects=on' : ''}")
212
+ p.scan(/<tr><td><a href="http:\/\/#{@sunflower.wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)(?:\?redirect=no|)">/){
213
+ list << @sunflower.cleanup_title($1)
214
+ }
215
+ return list
216
+ end
217
+ end
218
+
219
+ class Sunflower
220
+ # Makes a list of articles. Returns array of titles.
221
+ def make_list type, key, opts={}
222
+ begin
223
+ return Sunflower::List.new self, type, key, opts
224
+ rescue Sunflower::Error => e
225
+ if e.message == "no such list type available: #{type}"
226
+ backwards_compat = {
227
+ :categorieson => :categories_on,
228
+ :categoryrecursive => :category_recursive,
229
+ :categoryr => :category_recursive,
230
+ :linkson => :links_on,
231
+ :templateson => :templates_on,
232
+ :transclusionson => :templates_on,
233
+ :usercontribs => :contribs,
234
+ :whatlinksto => :whatlinkshere,
235
+ :whattranscludes => :whatembeds,
236
+ :imageusage => :image_usage,
237
+ :image => :image_usage,
238
+ :searchtitles => :search_titles,
239
+ :external => :linksearch,
240
+ :regex => :grep,
241
+ :regexp => :grep,
242
+ }
243
+
244
+ if type2 = backwards_compat[type.to_s.downcase.gsub(/[^a-z]/, '').to_sym]
245
+ warn "warning: #{type} has been renamed to #{type2}, old name will be removed in v0.6"
246
+ Sunflower::List.new self, type2, key, opts
247
+ else
248
+ raise e
249
+ end
250
+ else
251
+ raise e
252
+ end
253
+ end
254
+ end
255
+ end
data/lib/sunflower.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # coding: utf-8
2
2
  require 'sunflower/core'
3
3
  require 'sunflower/commontasks'
4
- require 'sunflower/listmaker'
4
+ require 'sunflower/list'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sunflower
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: '0.5'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-05 00:00:00.000000000 Z
12
+ date: 2012-08-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -55,26 +55,17 @@ files:
55
55
  - LICENSE
56
56
  - bin/sunflower-setup
57
57
  - example-bot.rb
58
- - use-easy-bot.rb
59
58
  - lib/sunflower.rb
60
59
  - lib/sunflower/core.rb
61
60
  - lib/sunflower/commontasks.rb
62
- - lib/sunflower/listmaker.rb
61
+ - lib/sunflower/list.rb
63
62
  - scripts/fix-bold-in-headers.rb
64
- - scripts/fix-multiple-same-refs.rb
65
63
  - scripts/fix-langs.rb
66
64
  - scripts/lekkoatl-portal.rb
67
- - scripts/ZDBOT.rb
68
- - scripts/aktualizacjapilkarzy.rb
69
- - scripts/changeimage.rb
70
- - scripts/insight.rb
71
- - scripts/make-id2team-list.rb
72
- - scripts/author-list.rb
73
65
  - scripts/fix-unicode-control-chars.rb
74
66
  - scripts/fix-double-pipes.rb
75
67
  - scripts/fix-some-entities.rb
76
68
  - scripts/recat.rb
77
- - scripts/wanted.rb
78
69
  homepage: http://github.com/MatmaRex/Sunflower
79
70
  licenses: []
80
71
  post_install_message:
@@ -1,160 +0,0 @@
1
- # coding: utf-8
2
- class Sunflower
3
- # Makes a list of articles. Returns array of titles.
4
- def make_list type, *parameters
5
- type=type.downcase.gsub(/[^a-z]/, '')
6
- first=parameters[0]
7
- firstE=CGI.escape first.to_s
8
-
9
- case type
10
- when 'file'
11
- f=File.open first
12
- list=f.read.sub(/\357\273\277/,'').strip.split(/\r?\n/)
13
- f.close
14
-
15
- when 'page', 'pages'
16
- list=parameters
17
-
18
- when 'input'
19
- puts 'Insert titles of articles to edit:'
20
- puts 'Press [Enter] without inputting any text to finish.'
21
- puts 'Press [Ctrl]+[C] to kill bot.'
22
- list=[]
23
- while true
24
- input=gets.strip
25
- break if input==''
26
-
27
- list<<input
28
- end
29
-
30
- when 'categorieson'
31
- r=self.API('action=query&prop=categories&cllimit=500&titles='+firstE)
32
- list=r['query']['pages'].first['categories'].map{|v| v['title']}
33
-
34
- when 'category'
35
- r=self.API('action=query&list=categorymembers&cmprop=title&cmlimit=5000&cmtitle='+firstE)
36
- list=r['query']['categorymembers'].map{|v| v['title']}
37
-
38
- when 'categoryr', 'categoryrecursive'
39
- list = [] # list of articles
40
- processed = []
41
- cats_to_process = [first] # list of categories to be processes
42
- while !cats_to_process.empty?
43
- now = cats_to_process.shift
44
- processed << now # make sure we do not get stuck in infinite loop
45
-
46
- list2 = self.make_list 'category', now # get contents of first cat in list
47
-
48
- # find categories and queue them
49
- cats_to_process += list2
50
- .select{|el| el=~/\AKategoria:/}
51
- .reject{|el| processed.include? el or cats_to_process.include? el}
52
-
53
- list += list2 # add articles to main list
54
- end
55
- list.uniq!
56
-
57
- when 'linkson'
58
- r=self.API('action=query&prop=links&pllimit=5000&titles='+firstE)
59
- list=r['query']['pages'].first['links'].map{|v| v['title']}
60
-
61
- when 'transclusionson', 'templateson'
62
- r=self.API('action=query&prop=templates&tllimit=5000&titles='+firstE)
63
- list=r['query']['pages'].first['templates'].map{|v| v['title']}
64
-
65
- when 'usercontribs', 'contribs'
66
- r=self.API('action=query&list=usercontribs&uclimit=5000&ucprop=title&ucuser='+firstE)
67
- list=r['query']['usercontribs'].map{|v| v['title']}
68
-
69
- when 'whatlinksto', 'whatlinkshere'
70
- r=self.API('action=query&list=backlinks&bllimit=5000&bltitle='+firstE)
71
- list=r['query']['backlinks'].map{|v| v['title']}
72
-
73
- when 'whattranscludes', 'whatembeds'
74
- r=self.API('action=query&list=embeddedin&eilimit=5000&eititle='+firstE)
75
- list=r['query']['embeddedin'].map{|v| v['title']}
76
-
77
- when 'image', 'imageusage'
78
- r=self.API('action=query&list=imageusage&iulimit=5000&iutitle='+firstE)
79
- list=r['query']['imageusage'].map{|v| v['title']}
80
-
81
- when 'search'
82
- r=self.API('action=query&list=search&srwhat=text&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
83
- list=r['query']['search'].map{|v| v['title']}
84
-
85
- when 'searchtitles'
86
- r=self.API('action=query&list=search&srwhat=title&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
87
- list=r['query']['search'].map{|v| v['title']}
88
-
89
- when 'random'
90
- r=self.API('action=query&list=random&rnnamespace=0&rnlimit='+firstE)
91
- list=r['query']['random'].map{|v| v['title']}
92
-
93
- when 'external', 'linksearch'
94
- r=self.API('action=query&euprop=title&list=exturlusage&eulimit=5000&euquery='+firstE)
95
- list=r['query']['exturlusage'].map{|v| v['title']}
96
-
97
- when 'google'
98
- limit=[parameters[1].to_i,999].min
99
- from=0
100
- list=[]
101
-
102
- while from<limit
103
- p=HTTP.get(URI.parse("http://www.google.pl/custom?q=kot&start=#{from}&sitesearch=#{@wikiURL}"))
104
- p.scan(/<div class=g><h2 class=r><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)" class=l>/){
105
- list<<CGI.unescape($1).gsub('_',' ')
106
- }
107
-
108
- from+=10
109
- end
110
-
111
- when 'grep', 'regex', 'regexp'
112
- split=@wikiURL.split('.')
113
- ns=(parameters[1] ? parameters[1].to_s.gsub(/\D/,'') : '0')
114
- redirs=(parameters[2] ? '&redirects=on' : '')
115
- list=[]
116
-
117
- p=HTTP.get(URI.parse("http://toolserver.org/~nikola/grep.php?pattern=#{firstE}&lang=#{split[0]}&wiki=#{split[1]}&ns=#{ns}#{redirs}"))
118
- p.scan(/<tr><td><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)(?:\?redirect=no|)">/){
119
- list<<CGI.unescape($1).gsub('_',' ')
120
- }
121
- end
122
-
123
- return list
124
- end
125
- end
126
-
127
- if $0==__FILE__
128
- puts 'What kind of list do you want to create?'
129
- if !(t=ARGV.shift)
130
- t=gets
131
- else
132
- t=t.strip
133
- puts t
134
- end
135
- puts ''
136
-
137
- puts 'Supply arguments to pass to listmaker:'
138
- puts '(press [Enter] without writing anything to finish)'
139
- arg=[]
140
- ARGV.each do |i|
141
- arg<<i.strip
142
- puts i.strip
143
- end
144
- while (a=gets.strip)!=''
145
- arg<<a
146
- end
147
-
148
- puts 'Making list, wait patiently...'
149
-
150
- s=Sunflower.new
151
- s.login
152
-
153
- l=s.make_list(t, *arg)
154
- l.sort!
155
- f=File.open('list.txt','w')
156
- f.write(l.join("\n"))
157
- f.close
158
-
159
- puts 'Done! List saved to "list.txt".'
160
- end
data/scripts/ZDBOT.rb DELETED
@@ -1,62 +0,0 @@
1
- require 'sunflower-core.rb'
2
- require 'sunflower-commontasks.rb'
3
- s=Sunflower.new
4
- s.login
5
-
6
- $summary='archiwizacja zadań'
7
-
8
- pp=Page.get('Wikipedia:Zadania dla botów')
9
- tasks=pp.text
10
-
11
- tasksDone=[]
12
- tasksError=[]
13
- tasksOld=[]
14
-
15
- tasks=tasks.gsub(/\n==\s*(.+?)\s*==\s*\{\{\/Status\|([^}]+)\}\}([\s\S]+?)(?=\r?\n==|\s*\Z)/) do
16
- title=$1.strip
17
- status=$2.strip
18
- text=$3.strip
19
-
20
- bval=''
21
-
22
- if (['wykonane','zrobione','błąd','błędne','stare'].index(status)==nil)
23
- bval=$&
24
- elsif (status=='wykonane' || status=='zrobione')
25
- tasksDone<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
26
- elsif (status=='błąd' || status=='błędne')
27
- tasksError<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
28
- elsif (status=='stare')
29
- tasksOld<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
30
- end
31
-
32
- bval
33
- end
34
-
35
- puts 'Data loaded. Saving...'
36
-
37
- p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/błędne')
38
- p.append tasksError.join("\n\n") unless tasksError.empty?
39
- p.save unless tasksError.empty?
40
- puts 'Error - saved.'
41
-
42
- p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/wykonane')
43
- p.append tasksDone.join("\n\n") unless tasksDone.empty?
44
- p.save unless tasksDone.empty?
45
- puts 'Done - saved.'
46
-
47
- p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/stare')
48
- p.append tasksOld.join("\n\n") unless tasksOld.empty?
49
- p.save unless tasksOld.empty?
50
- puts 'Old - saved.'
51
-
52
- pp.text=tasks
53
- pp.save
54
- puts 'Main - saved.'
55
-
56
- # File.open('ZDBOT_main.txt','w').write(tasks)
57
- # File.open('ZDBOT_done.txt','w').write(tasksDone.join("\n\n")) unless tasksDone.empty?
58
- # File.open('ZDBOT_error.txt','w').write(tasksError.join("\n\n")) unless tasksError.empty?
59
- # File.open('ZDBOT_old.txt','w').write(tasksOld.join("\n\n")) unless tasksOld.empty?
60
-
61
- puts "Stats: done: #{tasksDone.length}; error: #{tasksError.length}; old: #{tasksOld.length}"
62
- gets