RubyGems - sunflower - Versions diffs - 0.4.5 → 0.5 - Mend

sunflower 0.4.5 → 0.5

Files changed (17) hide show

data/README +1 -1
data/example-bot.rb +7 -8
data/lib/sunflower/commontasks.rb +3 -77
data/lib/sunflower/core.rb +257 -78
data/lib/sunflower/list.rb +255 -0
data/lib/sunflower.rb +1 -1
metadata +3 -12
data/lib/sunflower/listmaker.rb +0 -160
data/scripts/ZDBOT.rb +0 -62
data/scripts/aktualizacjapilkarzy.rb +0 -339
data/scripts/author-list.rb +0 -36
data/scripts/changeimage.rb +0 -42
data/scripts/fix-multiple-same-refs.rb +0 -102
data/scripts/insight.rb +0 -133
data/scripts/make-id2team-list.rb +0 -32
data/scripts/wanted.rb +0 -72
data/use-easy-bot.rb +0 -54

data/lib/sunflower/list.rb ADDED Viewed

@@ -0,0 +1,255 @@
+# coding: utf-8
+# Class representing a list of articles. Inherits from Array.
+class Sunflower::List < Array
+	# Create a new article list and fill it with items.
+	#
+	# Sunflower may be nil; this will, however, make most methods unavailable.
+	#
+	# This is in fact a wrapper for various list generator methods,
+	# each private, named with the format of "list_<type>",
+	# which accept the key and opts arguments and return arrays.
+	# You can use this behavior to create your own ones.
+	#
+	# You should probably use Sunflower#make_list instead of calling this directly.
+	def initialize sunflower, type, key, opts={}
+		@sunflower = sunflower
+		meth = :"list_#{type}"
+		if self.respond_to? meth, true
+			super(self.send meth, key, opts)
+		else
+			raise Sunflower::Error, "no such list type available: #{type}"
+		end
+	end
+	# Construct new list from an array.
+	def self.from_ary ary, sunflower=nil
+		Sunflower::List.new sunflower, 'pages', ary
+	end
+	# Converts self to an array of Sunflower::Page objects.
+	#
+	# Use #pages_preloaded to preload the text of all pages at once, instead of via separate requests.
+	def pages
+		Array.new self.map{|t| Sunflower::Page.new t, @sunflower }
+	end
+	# Converts self to an array of Sunflower::Page objects,
+	# then preloads the text in all of them using as little requests as possible.
+	# (API limit is at most 500 pages/req for bots, 50 for other users.)
+	#
+	# If any title is invalid, Sunflower::Error will be raised.
+	#
+	# If any title is uncanonicalizable by Sunflower#cleanup_title,
+	# it will not blow up or return incorrect results; however, text of some other
+	# pages may be missing (it will be lazy-loaded when requested, as usual).
+	def pages_preloaded
+		pgs = self.pages
+		at_once = @sunflower.is_bot? ? 500 : 50
+		# this is different from self; page titles are guaranteed to be canonicalized
+		titles = pgs.map{|a| a.title }
+		titles.each_slice(at_once).with_index do |slice, slice_no|
+			res = @sunflower.API('action=query&prop=revisions&rvprop=content&titles='+CGI.escape(slice.join '|'))
+			res['query']['pages'].values.each_with_index do |h, i|
+				page = pgs[slice_no*at_once + i]
+				if h['title'] and h['title'] == page.title
+					if h['missing']
+						page.text = ''
+					elsif h['invalid']
+						raise Sunflower::Error, 'title invalid: '+page.title
+					else
+						page.text = h['revisions'][0]['*']
+					end
+					page.preloaded_text = true
+				end
+			end
+		end
+		return pgs
+	end
+private
+	# Can be used to create a new list from array. Used internally in .from_ary.
+	def list_pages ary, opts={} # :doc:
+		ary
+	end
+	# Create from plaintext list, each title in separate line.
+	def list_plaintext text, opts={} # :doc:
+		text.split(/\r?\n/)
+	end
+	# Create from file. Supports BOM in UTF-8 files.
+	def list_file filename, opts={} # :doc:
+		lines = File.readlines(filename)
+		lines[0].sub!(/^\357\273\277/, '') # BOM
+		lines.each{|ln| ln.chomp! }
+		lines.pop while lines.last == ''
+		lines
+	end
+	# Categories on given page.
+	def list_categories_on page, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&prop=categories&cllimit=max&titles='+CGI.escape(page), 'pages', 'clcontinue')
+		r['query']['pages'].values.first['categories'].map{|v| v['title']}
+	end
+	# Category members.
+	def list_category cat, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&list=categorymembers&cmprop=title&cmlimit=max&cmtitle='+CGI.escape(cat), 'categorymembers', 'cmcontinue')
+		r['query']['categorymembers'].map{|v| v['title']}
+	end
+	# Category members. Scans categories recursively.
+	def list_category_recursive cat, opts={} # :doc:
+		list = [] # list of articles
+		processed = []
+		cats_to_process = [cat] # list of categories to be processes
+		while !cats_to_process.empty?
+			now = cats_to_process.shift
+			processed << now # make sure we do not get stuck in infinite loop
+			list2 = list_category now # get contents of first cat in list
+			 # find categories and queue them
+			cats_to_process += list2
+				.select{|el| el =~ /^#{@sunflower.ns_regex_for 'category'}:/}
+				.reject{|el| processed.include? el or cats_to_process.include? el}
+			list += list2 # add articles to main list
+		end
+		list.uniq!
+		return list
+	end
+	# Links on given page.
+	def list_links_on page, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&prop=links&pllimit=max&titles='+CGI.escape(page), 'pages', 'plcontinue')
+		r['query']['pages'].values.first['links'].map{|v| v['title']}
+	end
+	# Templates used on given page.
+	def list_templates_on page, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&prop=templates&tllimit=max&titles='+CGI.escape(page), 'pages', 'tlcontinue')
+		r['query']['pages'].values.first['templates'].map{|v| v['title']}
+	end
+	# Pages edited by given user.
+	def list_contribs user, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&list=usercontribs&uclimit=max&ucprop=title&ucuser='+CGI.escape(user), 'usercontribs', 'uccontinue')
+		r['query']['usercontribs'].map{|v| v['title']}
+	end
+	# Pages which link to given page.
+	def list_whatlinkshere page, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&list=backlinks&bllimit=max&bltitle='+CGI.escape(page), 'backlinks', 'blcontinue')
+		r['query']['backlinks'].map{|v| v['title']}
+	end
+	# Pages which embed (transclude) given page.
+	def list_whatembeds page, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&list=embeddedin&eilimit=max&eititle='+CGI.escape(page), 'embeddedin', 'eicontinue')
+		r['query']['embeddedin'].map{|v| v['title']}
+	end
+	# Pages which used given image.
+	def list_image_usage image, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&list=imageusage&iulimit=max&iutitle='+CGI.escape(image), 'imageusage', 'iucontinue')
+		r['query']['imageusage'].map{|v| v['title']}
+	end
+	# Search results for given text.
+	#
+	# Options:
+	# * ns: namespaces to search in, as pipe-separated numbers (or single number). Default: 0 (main).
+	def list_search text, opts={} # :doc:
+		opts = {ns: 0}.merge opts
+		r = @sunflower.API_continued('action=query&list=search&srwhat=text&srlimit=max&srnamespace='+CGI.escape(opts[:ns].to_s)+'&srsearch='+CGI.escape(text), 'search', 'srcontinue')
+		r['query']['search'].map{|v| v['title']}
+	end
+	# Search results for given text. Only searches in page titles. See also #list_grep.
+	#
+	# Options:
+	# * ns: namespaces to search in, as pipe-separated numbers (or single number). Default: 0 (main).
+	def list_search_titles key, opts={} # :doc:
+		opts = {ns: 0}.merge opts
+		r = @sunflower.API_continued('action=query&list=search&srwhat=title&srlimit=max&srnamespace='+CGI.escape(opts[:ns].to_s)+'&srsearch='+CGI.escape(key), 'search', 'srcontinue')
+		r['query']['search'].map{|v| v['title']}
+	end
+	# `count` random pages.
+	def list_random count, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&list=random&rnnamespace=0&rnlimit='+CGI.escape(count.to_s), 'random', 'rncontinue')
+		r['query']['random'].map{|v| v['title']}
+	end
+	# External link search. Format like on Special:LinkSearch.
+	def list_linksearch url, opts={} # :doc:
+		r = @sunflower.API_continued('action=query&list=exturlusage&eulimit=max&euprop=title&euquery='+CGI.escape(url), 'exturlusage', 'eucontinue')
+		r['query']['exturlusage'].map{|v| v['title']}
+	end
+	# Pages whose titles match given regex. Uses nikola's grep tool: http://toolserver.org/~nikola/grep.php
+	#
+	# Options:
+	# * ns: namespace to search in, as a number (default: 0, main)
+	# * redirs: whether to include redirects in search results (default: true)
+	def list_grep regex, opts={} # :doc:
+		opts = {ns: 0, redirs: true}.merge opts
+		lang, wiki = @sunflower.wikiURL.split '.', 2
+		list = []
+		p = RestClient.get("http://toolserver.org/~nikola/grep.php?pattern=#{CGI.escape regex}&lang=#{CGI.escape lang}&wiki=#{CGI.escape wiki}&ns=#{CGI.escape opts[:ns].to_s}#{opts[:redirs] ? '&redirects=on' : ''}")
+		p.scan(/<tr><td><a href="http:\/\/#{@sunflower.wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)(?:\?redirect=no|)">/){
+			list << @sunflower.cleanup_title($1)
+		}
+		return list
+	end
+end
+class Sunflower
+	# Makes a list of articles. Returns array of titles.
+	def make_list type, key, opts={}
+		begin
+			return Sunflower::List.new self, type, key, opts
+		rescue Sunflower::Error => e
+			if e.message == "no such list type available: #{type}"
+				backwards_compat = {
+					:categorieson => :categories_on,
+					:categoryrecursive => :category_recursive,
+					:categoryr => :category_recursive,
+					:linkson => :links_on,
+					:templateson => :templates_on,
+					:transclusionson => :templates_on,
+					:usercontribs => :contribs,
+					:whatlinksto => :whatlinkshere,
+					:whattranscludes => :whatembeds,
+					:imageusage => :image_usage,
+					:image => :image_usage,
+					:searchtitles => :search_titles,
+					:external => :linksearch,
+					:regex => :grep,
+					:regexp => :grep,
+				}
+				if type2 = backwards_compat[type.to_s.downcase.gsub(/[^a-z]/, '').to_sym]
+					warn "warning: #{type} has been renamed to #{type2}, old name will be removed in v0.6"
+					Sunflower::List.new self, type2, key, opts
+				else
+					raise e
+				end
+			else
+				raise e
+			end
+		end
+	end
+end

data/lib/sunflower.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 # coding: utf-8
 require 'sunflower/core'
 require 'sunflower/commontasks'
-require 'sunflower/listmaker'
+require 'sunflower/list'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sunflower
 version: !ruby/object:Gem::Version
-  version: 0.4.5
+  version: '0.5'
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-08-05 00:00:00.000000000 Z
+date: 2012-08-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: json
@@ -55,26 +55,17 @@ files:
 - LICENSE
 - bin/sunflower-setup
 - example-bot.rb
-- use-easy-bot.rb
 - lib/sunflower.rb
 - lib/sunflower/core.rb
 - lib/sunflower/commontasks.rb
-- lib/sunflower/listmaker.rb
+- lib/sunflower/list.rb
 - scripts/fix-bold-in-headers.rb
-- scripts/fix-multiple-same-refs.rb
 - scripts/fix-langs.rb
 - scripts/lekkoatl-portal.rb
-- scripts/ZDBOT.rb
-- scripts/aktualizacjapilkarzy.rb
-- scripts/changeimage.rb
-- scripts/insight.rb
-- scripts/make-id2team-list.rb
-- scripts/author-list.rb
 - scripts/fix-unicode-control-chars.rb
 - scripts/fix-double-pipes.rb
 - scripts/fix-some-entities.rb
 - scripts/recat.rb
-- scripts/wanted.rb
 homepage: http://github.com/MatmaRex/Sunflower
 licenses: []
 post_install_message:

data/lib/sunflower/listmaker.rb DELETED Viewed

@@ -1,160 +0,0 @@
-# coding: utf-8
-class Sunflower
-	# Makes a list of articles. Returns array of titles.
-	def make_list type, *parameters
-		type=type.downcase.gsub(/[^a-z]/, '')
-		first=parameters[0]
-		firstE=CGI.escape first.to_s
-		case type
-		when 'file'
-			f=File.open first
-			list=f.read.sub(/\357\273\277/,'').strip.split(/\r?\n/)
-			f.close
-		when 'page', 'pages'
-			list=parameters
-		when 'input'
-			puts 'Insert titles of articles to edit:'
-			puts 'Press [Enter] without inputting any text to finish.'
-			puts 'Press [Ctrl]+[C] to kill bot.'
-			list=[]
-			while true
-				input=gets.strip
-				break if input==''
-				list<<input
-			end
-		when 'categorieson'
-			r=self.API('action=query&prop=categories&cllimit=500&titles='+firstE)
-			list=r['query']['pages'].first['categories'].map{|v| v['title']}
-		when 'category'
-			r=self.API('action=query&list=categorymembers&cmprop=title&cmlimit=5000&cmtitle='+firstE)
-			list=r['query']['categorymembers'].map{|v| v['title']}
-		when 'categoryr', 'categoryrecursive'
-			list = [] # list of articles
-			processed = []
-			cats_to_process = [first] # list of categories to be processes
-			while !cats_to_process.empty?
-				now = cats_to_process.shift
-				processed << now # make sure we do not get stuck in infinite loop
-				list2 = self.make_list 'category', now # get contents of first cat in list
-				 # find categories and queue them
-				cats_to_process += list2
-					.select{|el| el=~/\AKategoria:/}
-					.reject{|el| processed.include? el or cats_to_process.include? el}
-				list += list2 # add articles to main list
-			end
-			list.uniq!
-		when 'linkson'
-			r=self.API('action=query&prop=links&pllimit=5000&titles='+firstE)
-			list=r['query']['pages'].first['links'].map{|v| v['title']}
-		when 'transclusionson', 'templateson'
-			r=self.API('action=query&prop=templates&tllimit=5000&titles='+firstE)
-			list=r['query']['pages'].first['templates'].map{|v| v['title']}
-		when 'usercontribs', 'contribs'
-			r=self.API('action=query&list=usercontribs&uclimit=5000&ucprop=title&ucuser='+firstE)
-			list=r['query']['usercontribs'].map{|v| v['title']}
-		when 'whatlinksto', 'whatlinkshere'
-			r=self.API('action=query&list=backlinks&bllimit=5000&bltitle='+firstE)
-			list=r['query']['backlinks'].map{|v| v['title']}
-		when 'whattranscludes', 'whatembeds'
-			r=self.API('action=query&list=embeddedin&eilimit=5000&eititle='+firstE)
-			list=r['query']['embeddedin'].map{|v| v['title']}
-		when 'image', 'imageusage'
-			r=self.API('action=query&list=imageusage&iulimit=5000&iutitle='+firstE)
-			list=r['query']['imageusage'].map{|v| v['title']}
-		when 'search'
-			r=self.API('action=query&list=search&srwhat=text&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
-			list=r['query']['search'].map{|v| v['title']}
-		when 'searchtitles'
-			r=self.API('action=query&list=search&srwhat=title&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
-			list=r['query']['search'].map{|v| v['title']}
-		when 'random'
-			r=self.API('action=query&list=random&rnnamespace=0&rnlimit='+firstE)
-			list=r['query']['random'].map{|v| v['title']}
-		when 'external', 'linksearch'
-			r=self.API('action=query&euprop=title&list=exturlusage&eulimit=5000&euquery='+firstE)
-			list=r['query']['exturlusage'].map{|v| v['title']}
-		when 'google'
-			limit=[parameters[1].to_i,999].min
-			from=0
-			list=[]
-			while from<limit
-				p=HTTP.get(URI.parse("http://www.google.pl/custom?q=kot&start=#{from}&sitesearch=#{@wikiURL}"))
-				p.scan(/<div class=g><h2 class=r><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)" class=l>/){
-					list<<CGI.unescape($1).gsub('_',' ')
-				}
-				from+=10
-			end
-		when 'grep', 'regex', 'regexp'
-			split=@wikiURL.split('.')
-			ns=(parameters[1] ? parameters[1].to_s.gsub(/\D/,'') : '0')
-			redirs=(parameters[2] ? '&redirects=on' : '')
-			list=[]
-			p=HTTP.get(URI.parse("http://toolserver.org/~nikola/grep.php?pattern=#{firstE}&lang=#{split[0]}&wiki=#{split[1]}&ns=#{ns}#{redirs}"))
-			p.scan(/<tr><td><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)(?:\?redirect=no|)">/){
-				list<<CGI.unescape($1).gsub('_',' ')
-			}
-		end
-		return list
-	end
-end
-if $0==__FILE__
-	puts 'What kind of list do you want to create?'
-	if !(t=ARGV.shift)
-		t=gets
-	else
-		t=t.strip
-		puts t
-	end
-	puts ''
-	puts 'Supply arguments to pass to listmaker:'
-	puts '(press [Enter] without writing anything to finish)'
-	arg=[]
-	ARGV.each do |i|
-		arg<<i.strip
-		puts i.strip
-	end
-	while (a=gets.strip)!=''
-		arg<<a
-	end
-	puts 'Making list, wait patiently...'
-	s=Sunflower.new
-	s.login
-	l=s.make_list(t, *arg)
-	l.sort!
-	f=File.open('list.txt','w')
-	f.write(l.join("\n"))
-	f.close
-	puts 'Done! List saved to "list.txt".'
-end

data/scripts/ZDBOT.rb DELETED Viewed

@@ -1,62 +0,0 @@
-require 'sunflower-core.rb'
-require 'sunflower-commontasks.rb'
-s=Sunflower.new
-s.login
-$summary='archiwizacja zadań'
-pp=Page.get('Wikipedia:Zadania dla botów')
-tasks=pp.text
-tasksDone=[]
-tasksError=[]
-tasksOld=[]
-tasks=tasks.gsub(/\n==\s*(.+?)\s*==\s*\{\{\/Status\|([^}]+)\}\}([\s\S]+?)(?=\r?\n==|\s*\Z)/) do
-	title=$1.strip
-	status=$2.strip
-	text=$3.strip
-	bval=''
-	if (['wykonane','zrobione','błąd','błędne','stare'].index(status)==nil)
-		bval=$&
-	elsif (status=='wykonane' || status=='zrobione')
-		tasksDone<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
-	elsif (status=='błąd' || status=='błędne')
-		tasksError<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
-	elsif (status=='stare')
-		tasksOld<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
-	end
-	bval
-end
-puts 'Data loaded. Saving...'
-p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/błędne')
-p.append tasksError.join("\n\n") unless tasksError.empty?
-p.save unless tasksError.empty?
-puts 'Error - saved.'
-p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/wykonane')
-p.append tasksDone.join("\n\n") unless tasksDone.empty?
-p.save unless tasksDone.empty?
-puts 'Done - saved.'
-p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/stare')
-p.append tasksOld.join("\n\n") unless tasksOld.empty?
-p.save unless tasksOld.empty?
-puts 'Old - saved.'
-pp.text=tasks
-pp.save
-puts 'Main - saved.'
-# File.open('ZDBOT_main.txt','w').write(tasks)
-# File.open('ZDBOT_done.txt','w').write(tasksDone.join("\n\n")) unless tasksDone.empty?
-# File.open('ZDBOT_error.txt','w').write(tasksError.join("\n\n")) unless tasksError.empty?
-# File.open('ZDBOT_old.txt','w').write(tasksOld.join("\n\n")) unless tasksOld.empty?
-puts "Stats: done: #{tasksDone.length}; error: #{tasksError.length}; old: #{tasksOld.length}"
-gets