sunflower 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +4 -0
- data/README +21 -0
- data/bin/sunflower-setup +68 -0
- data/example-bot.rb +12 -0
- data/lib/sunflower/commontasks.rb +251 -0
- data/lib/sunflower/core.rb +253 -0
- data/lib/sunflower/listmaker.rb +152 -0
- data/lib/sunflower.rb +4 -0
- data/scripts/ZDBOT.rb +62 -0
- data/scripts/aktualizacjapilkarzy.rb +339 -0
- data/scripts/author-list.rb +36 -0
- data/scripts/changeimage.rb +42 -0
- data/scripts/fix-bold-in-headers.rb +53 -0
- data/scripts/fix-double-pipes.rb +49 -0
- data/scripts/fix-langs.rb +43 -0
- data/scripts/fix-multiple-same-refs.rb +102 -0
- data/scripts/fix-some-entities.rb +43 -0
- data/scripts/fix-unicode-control-chars.rb +51 -0
- data/scripts/insight.rb +133 -0
- data/scripts/lekkoatl-portal.rb +51 -0
- data/scripts/make-id2team-list.rb +32 -0
- data/scripts/recat.rb +32 -0
- data/scripts/wanted.rb +72 -0
- data/use-easy-bot.rb +54 -0
- metadata +115 -0
@@ -0,0 +1,152 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
class Sunflower
|
3
|
+
# Makes a list of articles. Returns array of titles.
|
4
|
+
def make_list type, *parameters
|
5
|
+
type=type.downcase.gsub(/[^a-z]/, '')
|
6
|
+
first=parameters[0]
|
7
|
+
firstE=CGI.escape first
|
8
|
+
|
9
|
+
case type
|
10
|
+
when 'file'
|
11
|
+
f=File.open first
|
12
|
+
list=f.read.sub(/\357\273\277/,'').strip.split(/\r?\n/)
|
13
|
+
f.close
|
14
|
+
|
15
|
+
when 'page', 'pages'
|
16
|
+
list=parameters
|
17
|
+
|
18
|
+
when 'input'
|
19
|
+
puts 'Insert titles of articles to edit:'
|
20
|
+
puts 'Press [Enter] without inputting any text to finish.'
|
21
|
+
puts 'Press [Ctrl]+[C] to kill bot.'
|
22
|
+
list=[]
|
23
|
+
while true
|
24
|
+
input=gets.strip
|
25
|
+
break if input==''
|
26
|
+
|
27
|
+
list<<input
|
28
|
+
end
|
29
|
+
|
30
|
+
when 'categorieson'
|
31
|
+
r=self.API('action=query&prop=categories&cllimit=500&titles='+firstE)
|
32
|
+
list=r['query']['pages'].first['categories'].map{|v| v['title']} #extract titles
|
33
|
+
|
34
|
+
when 'category'
|
35
|
+
r=self.API('action=query&list=categorymembers&cmprop=title&cmlimit=5000&cmtitle='+firstE)
|
36
|
+
list=r['query']['categorymembers'].map{|v| v['title']} #extract titles
|
37
|
+
|
38
|
+
when 'categoryr', 'categoryrecursive'
|
39
|
+
list=[] #list of articles
|
40
|
+
catsToProcess=[first] #list of categories to be processes
|
41
|
+
while !catsToProcess.empty?
|
42
|
+
list2=self.make_list('category',catsToProcess[0]) # get contents of first cat in list
|
43
|
+
catsToProcess=catsToProcess+list2.select{|el| el=~/\AKategoria:/} # find categories in it and queue them to be processes
|
44
|
+
catsToProcess.delete_at 0 # remove first category from list
|
45
|
+
list=list+list2 #add articles to main list
|
46
|
+
end
|
47
|
+
list.uniq! #remove dupes
|
48
|
+
|
49
|
+
when 'linkson'
|
50
|
+
r=self.API('action=query&prop=links&pllimit=5000&titles='+firstE)
|
51
|
+
list=r['query']['pages'].first['links'].map{|v| v['title']} #extract titles
|
52
|
+
|
53
|
+
when 'transclusionson', 'templateson'
|
54
|
+
r=self.API('action=query&prop=templates&tllimit=5000&titles='+firstE)
|
55
|
+
list=r['query']['pages'].first['templates'].map{|v| v['title']} #extract titles
|
56
|
+
|
57
|
+
when 'usercontribs', 'contribs'
|
58
|
+
r=self.API('action=query&list=usercontribs&uclimit=5000&ucprop=title&ucuser='+firstE)
|
59
|
+
list=r['query']['usercontribs'].map{|v| v['title']} #extract titles
|
60
|
+
|
61
|
+
when 'whatlinksto', 'whatlinkshere'
|
62
|
+
r=self.API('action=query&list=backlinks&bllimit=5000&bltitle='+firstE)
|
63
|
+
list=r['query']['backlinks'].map{|v| v['title']} #extract titles
|
64
|
+
|
65
|
+
when 'whattranscludes', 'whatembeds'
|
66
|
+
r=self.API('action=query&list=embeddedin&eilimit=5000&eititle='+firstE)
|
67
|
+
list=r['query']['embeddedin'].map{|v| v['title']} #extract titles
|
68
|
+
|
69
|
+
when 'image', 'imageusage'
|
70
|
+
r=self.API('action=query&list=imageusage&iulimit=5000&iutitle='+firstE)
|
71
|
+
list=r['query']['imageusage'].map{|v| v['title']} #extract titles
|
72
|
+
|
73
|
+
when 'search'
|
74
|
+
r=self.API('action=query&list=search&srwhat=text&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
|
75
|
+
list=r['query']['search'].map{|v| v['title']} #extract titles
|
76
|
+
|
77
|
+
when 'searchtitles'
|
78
|
+
r=self.API('action=query&list=search&srwhat=title&srlimit=5000&srnamespace='+(parameters[1]=='allns' ? CGI.escape('0|1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|100|101|102|103') : '0')+'&srsearch='+firstE)
|
79
|
+
list=r['query']['search'].map{|v| v['title']} #extract titles
|
80
|
+
|
81
|
+
when 'random'
|
82
|
+
r=self.API('action=query&list=random&rnnamespace=0&rnlimit='+first.gsub(/\D/))
|
83
|
+
list=r['query']['random'].map{|v| v['title']} #extract titles
|
84
|
+
|
85
|
+
when 'external', 'linksearch'
|
86
|
+
r=self.API('action=query&euprop=title&list=exturlusage&eulimit=5000&euquery='+firstE)
|
87
|
+
list=r['query']['exturlusage'].map{|v| v['title']} #extract titles
|
88
|
+
|
89
|
+
when 'google'
|
90
|
+
limit=[parameters[1].to_i,999].min
|
91
|
+
from=0
|
92
|
+
list=[]
|
93
|
+
|
94
|
+
while from<limit
|
95
|
+
p=HTTP.get(URI.parse("http://www.google.pl/custom?q=kot&start=#{from}&sitesearch=#{@wikiURL}"))
|
96
|
+
p.scan(/<div class=g><h2 class=r><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)" class=l>/){
|
97
|
+
list<<CGI.unescape($1).gsub('_',' ')
|
98
|
+
}
|
99
|
+
|
100
|
+
from+=10
|
101
|
+
end
|
102
|
+
|
103
|
+
when 'grep', 'regex', 'regexp'
|
104
|
+
split=@wikiURL.split('.')
|
105
|
+
ns=(parameters[1] ? parameters[1].to_s.gsub(/\D/,'') : '0')
|
106
|
+
redirs=(parameters[2] ? '&redirects=on' : '')
|
107
|
+
list=[]
|
108
|
+
|
109
|
+
p=HTTP.get(URI.parse("http://toolserver.org/~nikola/grep.php?pattern=#{firstE}&lang=#{split[0]}&wiki=#{split[1]}&ns=#{ns}#{redirs}"))
|
110
|
+
p.scan(/<tr><td><a href="http:\/\/#{@wikiURL}\/wiki\/([^#<>\[\]\|\{\}]+?)(?:\?redirect=no|)">/){
|
111
|
+
list<<CGI.unescape($1).gsub('_',' ')
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
return list
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
if $0==__FILE__
|
120
|
+
puts 'What kind of list do you want to create?'
|
121
|
+
if !(t=ARGV.shift)
|
122
|
+
t=gets
|
123
|
+
else
|
124
|
+
t=t.strip
|
125
|
+
puts t
|
126
|
+
end
|
127
|
+
puts ''
|
128
|
+
|
129
|
+
puts 'Supply arguments to pass to listmaker:'
|
130
|
+
puts '(press [Enter] without writing anything to finish)'
|
131
|
+
arg=[]
|
132
|
+
ARGV.each do |i|
|
133
|
+
arg<<i.strip
|
134
|
+
puts i.strip
|
135
|
+
end
|
136
|
+
while (a=gets.strip)!=''
|
137
|
+
arg<<a
|
138
|
+
end
|
139
|
+
|
140
|
+
puts 'Making list, wait patiently...'
|
141
|
+
|
142
|
+
s=Sunflower.new
|
143
|
+
s.login
|
144
|
+
|
145
|
+
l=s.make_list(t, *arg)
|
146
|
+
l.sort!
|
147
|
+
f=File.open('list.txt','w')
|
148
|
+
f.write(l.join("\n"))
|
149
|
+
f.close
|
150
|
+
|
151
|
+
puts 'Done! List saved to "list.txt".'
|
152
|
+
end
|
data/lib/sunflower.rb
ADDED
data/scripts/ZDBOT.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'sunflower-core.rb'
|
2
|
+
require 'sunflower-commontasks.rb'
|
3
|
+
s=Sunflower.new
|
4
|
+
s.login
|
5
|
+
|
6
|
+
$summary='archiwizacja zadań'
|
7
|
+
|
8
|
+
pp=Page.get('Wikipedia:Zadania dla botów')
|
9
|
+
tasks=pp.text
|
10
|
+
|
11
|
+
tasksDone=[]
|
12
|
+
tasksError=[]
|
13
|
+
tasksOld=[]
|
14
|
+
|
15
|
+
tasks=tasks.gsub(/\n==\s*(.+?)\s*==\s*\{\{\/Status\|([^}]+)\}\}([\s\S]+?)(?=\r?\n==|\s*\Z)/) do
|
16
|
+
title=$1.strip
|
17
|
+
status=$2.strip
|
18
|
+
text=$3.strip
|
19
|
+
|
20
|
+
bval=''
|
21
|
+
|
22
|
+
if (['wykonane','zrobione','błąd','błędne','stare'].index(status)==nil)
|
23
|
+
bval=$&
|
24
|
+
elsif (status=='wykonane' || status=='zrobione')
|
25
|
+
tasksDone<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
|
26
|
+
elsif (status=='błąd' || status=='błędne')
|
27
|
+
tasksError<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
|
28
|
+
elsif (status=='stare')
|
29
|
+
tasksOld<<"== "+title+" ==\n{{/Status|"+status+"}}\n"+text
|
30
|
+
end
|
31
|
+
|
32
|
+
bval
|
33
|
+
end
|
34
|
+
|
35
|
+
puts 'Data loaded. Saving...'
|
36
|
+
|
37
|
+
p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/błędne')
|
38
|
+
p.append tasksError.join("\n\n") unless tasksError.empty?
|
39
|
+
p.save unless tasksError.empty?
|
40
|
+
puts 'Error - saved.'
|
41
|
+
|
42
|
+
p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/wykonane')
|
43
|
+
p.append tasksDone.join("\n\n") unless tasksDone.empty?
|
44
|
+
p.save unless tasksDone.empty?
|
45
|
+
puts 'Done - saved.'
|
46
|
+
|
47
|
+
p=Page.get('Wikipedia:Zadania_dla_botów/Archiwum/stare')
|
48
|
+
p.append tasksOld.join("\n\n") unless tasksOld.empty?
|
49
|
+
p.save unless tasksOld.empty?
|
50
|
+
puts 'Old - saved.'
|
51
|
+
|
52
|
+
pp.text=tasks
|
53
|
+
pp.save
|
54
|
+
puts 'Main - saved.'
|
55
|
+
|
56
|
+
# File.open('ZDBOT_main.txt','w').write(tasks)
|
57
|
+
# File.open('ZDBOT_done.txt','w').write(tasksDone.join("\n\n")) unless tasksDone.empty?
|
58
|
+
# File.open('ZDBOT_error.txt','w').write(tasksError.join("\n\n")) unless tasksError.empty?
|
59
|
+
# File.open('ZDBOT_old.txt','w').write(tasksOld.join("\n\n")) unless tasksOld.empty?
|
60
|
+
|
61
|
+
puts "Stats: done: #{tasksDone.length}; error: #{tasksError.length}; old: #{tasksOld.length}"
|
62
|
+
gets
|
@@ -0,0 +1,339 @@
|
|
1
|
+
require 'orderedhash'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'net/http'
|
4
|
+
require 'sunflower-core.rb'
|
5
|
+
require 'sunflower-listmaker.rb'
|
6
|
+
include Net
|
7
|
+
|
8
|
+
$datafile=File.open('aktual.txt','w')
|
9
|
+
$datafile.sync=true
|
10
|
+
|
11
|
+
id2team={}
|
12
|
+
begin
|
13
|
+
File.open('id2team.txt') do |f|
|
14
|
+
id2team.replace Hash[*f.read.strip.split(/\r?\n|\t/)]
|
15
|
+
end
|
16
|
+
rescue
|
17
|
+
end
|
18
|
+
|
19
|
+
# comes from http://rubyforge.org/frs/?group_id=6257&release_id=36721
|
20
|
+
module Levenshtein
|
21
|
+
VERSION = "0.2.0"
|
22
|
+
|
23
|
+
# Returns the Levenshtein distance as a number between 0.0 and
|
24
|
+
# 1.0. It's basically the Levenshtein distance divided by the
|
25
|
+
# length of the longest sequence.
|
26
|
+
|
27
|
+
def self.normalized_distance(s1, s2, threshold=nil)
|
28
|
+
s1, s2 = s2, s1 if s1.length > s2.length # s1 is the short one; s2 is the long one.
|
29
|
+
|
30
|
+
if s2.length == 0
|
31
|
+
0.0 # Since s1.length < s2.length, s1 must be empty as well.
|
32
|
+
else
|
33
|
+
if threshold
|
34
|
+
if d = self.distance(s1, s2, (threshold*s2.length+1).to_i)
|
35
|
+
d.to_f/s2.length
|
36
|
+
else
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
else
|
40
|
+
self.distance(s1, s2).to_f/s2.length
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns the Levenshtein distance between two sequences.
|
46
|
+
#
|
47
|
+
# The two sequences can be two strings, two arrays, or two other
|
48
|
+
# objects. Strings, arrays and arrays of strings are handled with
|
49
|
+
# optimized (very fast) C code. All other sequences are handled
|
50
|
+
# with generic (fast) C code.
|
51
|
+
#
|
52
|
+
# The sequences should respond to :length and :[] and all objects
|
53
|
+
# in the sequences (as returned by []) should response to :==.
|
54
|
+
|
55
|
+
def self.distance(s1, s2, threshold=nil)
|
56
|
+
s1, s2 = s2, s1 if s1.length > s2.length # s1 is the short one; s2 is the long one.
|
57
|
+
|
58
|
+
# Handle some basic circumstances.
|
59
|
+
|
60
|
+
return 0 if s1 == s2
|
61
|
+
return s2.length if s1.length == 0
|
62
|
+
|
63
|
+
if threshold
|
64
|
+
return nil if (s2.length-s1.length) >= threshold
|
65
|
+
|
66
|
+
a1, a2 = nil, nil
|
67
|
+
a1, a2 = s1, s2 if s1.respond_to?(:-) and s2.respond_to?(:-)
|
68
|
+
a1, a2 = s1.scan(/./), s2.scan(/./) if s1.respond_to?(:scan) and s2.respond_to?(:scan)
|
69
|
+
|
70
|
+
if a1 and a2
|
71
|
+
return nil if (a1-a2).length >= threshold
|
72
|
+
return nil if (a2-a1).length >= threshold
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
distance_fast_or_slow(s1, s2, threshold)
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.distance_fast_or_slow(s1, s2, threshold) # :nodoc:
|
80
|
+
if respond_to?(:levenshtein_distance_fast)
|
81
|
+
levenshtein_distance_fast(s1, s2, threshold) # Implemented in C.
|
82
|
+
else
|
83
|
+
levenshtein_distance_slow(s1, s2, threshold) # Implemented in Ruby.
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.levenshtein_distance_slow(s1, s2, threshold) # :nodoc:
|
88
|
+
row = (0..s1.length).to_a
|
89
|
+
|
90
|
+
1.upto(s2.length) do |y|
|
91
|
+
prow = row
|
92
|
+
row = [y]
|
93
|
+
|
94
|
+
1.upto(s1.length) do |x|
|
95
|
+
row[x] = [prow[x]+1, row[x-1]+1, prow[x-1]+(s1[x-1]==s2[y-1] ? 0 : 1)].min
|
96
|
+
end
|
97
|
+
|
98
|
+
# Stop analysing this sequence as soon as the best possible
|
99
|
+
# result for this sequence is bigger than the best result so far.
|
100
|
+
# (The minimum value in the next row will be equal to or greater
|
101
|
+
# than the minimum value in this row.)
|
102
|
+
|
103
|
+
return nil if threshold and row.min >= threshold
|
104
|
+
end
|
105
|
+
|
106
|
+
row[-1]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def puts *arg
|
112
|
+
arg.each{|str| $stdout.puts str; $datafile.puts str}
|
113
|
+
end
|
114
|
+
|
115
|
+
def saveData
|
116
|
+
=begin
|
117
|
+
File.open('aktualdata.txt','w'){|f|
|
118
|
+
f.write "
|
119
|
+
$notfound=#{$notfound.length}
|
120
|
+
$same=#{$same.length}
|
121
|
+
$diff=#{$diff.length}
|
122
|
+
----
|
123
|
+
$notfound:
|
124
|
+
# {$notfound.join "\n"}
|
125
|
+
----
|
126
|
+
$same:
|
127
|
+
# {$same.join "\n"}
|
128
|
+
----
|
129
|
+
$diff:
|
130
|
+
# {$diff.join "\n"}
|
131
|
+
"
|
132
|
+
}
|
133
|
+
=end
|
134
|
+
end
|
135
|
+
|
136
|
+
def get(url)
|
137
|
+
return HTTP.get(URI.parse(url))
|
138
|
+
end
|
139
|
+
|
140
|
+
def getPlayerData url
|
141
|
+
r=get url
|
142
|
+
r=~/<b>All time playing career<\/b>/
|
143
|
+
r=$'
|
144
|
+
r=~/<a name=games><\/a>/
|
145
|
+
table=$`.strip
|
146
|
+
|
147
|
+
h=Hpricot.parse table
|
148
|
+
rows=h.search 'tr+tr'
|
149
|
+
|
150
|
+
data={}
|
151
|
+
rows.each do |r|
|
152
|
+
if r.at('td')['colspan']==nil && (r.inner_html=~/No appearance data available/)==nil
|
153
|
+
cells=r.search 'td'
|
154
|
+
team=cells[0].search('font a')[0].inner_html.strip
|
155
|
+
teamid=cells[0].search('font a')[0]['href'].sub(/\A.+?(\d+)\Z/, '\1')
|
156
|
+
matches=cells[4].at('font').inner_html.split('(').map{|m| m.gsub(/[^0-9]/,'').to_i}
|
157
|
+
matches=matches[0]+matches[1]
|
158
|
+
goals=cells[5].at('font').inner_html.gsub(/[^0-9]/,'').to_i
|
159
|
+
|
160
|
+
data[team]=[matches,goals,teamid]
|
161
|
+
end
|
162
|
+
end
|
163
|
+
return data
|
164
|
+
end
|
165
|
+
|
166
|
+
def searchForPlayer text
|
167
|
+
d=get "http://www.soccerbase.com/search.sd?search_string=#{CGI.escape text}&search_cat=players"
|
168
|
+
d=~/window.location = "(http:[^"]+)"/
|
169
|
+
|
170
|
+
return $1
|
171
|
+
end
|
172
|
+
|
173
|
+
$edits=0
|
174
|
+
$summary='aktualizacja danych o meczach piłkarza'
|
175
|
+
|
176
|
+
puts 'Making list...'
|
177
|
+
s=Sunflower.new('pl.wikipedia.org')
|
178
|
+
s.login
|
179
|
+
enw=Sunflower.new('en.wikipedia.org')
|
180
|
+
enw.login
|
181
|
+
|
182
|
+
# list=(
|
183
|
+
# s.makeList('category-r', 'Kategoria:Piłkarze Aston Villa F.C.')+
|
184
|
+
# s.makeList('category-r', 'Kategoria:Piłkarze Chelsea F.C.')+
|
185
|
+
# s.makeList('category-r', 'Kategoria:Piłkarze Liverpool F.C.')
|
186
|
+
# ).uniq
|
187
|
+
# list=(
|
188
|
+
# s.makeList('category-r', 'Kategoria:Piłkarze angielskich klubów')+
|
189
|
+
# s.makeList('category-r', 'Kategoria:Piłkarze walijskich klubów')
|
190
|
+
# ).uniq
|
191
|
+
|
192
|
+
# list.delete_if{|i| i=~/^Kategoria:/}
|
193
|
+
|
194
|
+
# File.open('lista-pilkarze.txt','w').write list.join("\n")
|
195
|
+
# list=File.open('lista-pilkarze.txt').read.split(/\r?\n/)
|
196
|
+
list=['Wikipedysta:Matma Rex/brudnopis']
|
197
|
+
|
198
|
+
puts 'Done!'
|
199
|
+
puts ''
|
200
|
+
|
201
|
+
$notfound=[]
|
202
|
+
$same=[]
|
203
|
+
$diff=[]
|
204
|
+
|
205
|
+
list.each_with_index do |art, i|
|
206
|
+
exit if $edits>4
|
207
|
+
|
208
|
+
# finding data
|
209
|
+
puts "* [[#{art}]]"
|
210
|
+
pPl=Page.new(art, 'pl')
|
211
|
+
pPl.read=~/\[\[en:([^\]]+)\]\]/
|
212
|
+
if $1
|
213
|
+
artEn=$1
|
214
|
+
puts "** Interwiki-en: [[:en:#{artEn}]]"
|
215
|
+
else
|
216
|
+
artEn=art
|
217
|
+
puts "** No interwiki; guessing [[:en:#{art}]]"
|
218
|
+
end
|
219
|
+
|
220
|
+
pPl.read=~/\{\{soccerbase.*?(\d+).*?\}\}|soccerbase\.com\/players_details\.sd\?playerid=(\d+)/i
|
221
|
+
if $1||$2
|
222
|
+
soccid=$1||$2
|
223
|
+
url="http://www.soccerbase.com/players_details.sd?playerid=#{soccid}"
|
224
|
+
puts '** Found id on plwiki'
|
225
|
+
else
|
226
|
+
pEn=Page.new(art, 'en')
|
227
|
+
pEn.read=~/\{\{soccerbase.*?(\d+).*?\}\}|soccerbase\.com\/players_details\.sd\?playerid=(\d+)/i
|
228
|
+
if $1||$2
|
229
|
+
soccid=$1||$2
|
230
|
+
url="http://www.soccerbase.com/players_details.sd?playerid=#{soccid}"
|
231
|
+
puts '** Found id on enwiki'
|
232
|
+
else
|
233
|
+
url=searchForPlayer(art)||searchForPlayer(artEn)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
if url==nil
|
238
|
+
puts '** Not found.'
|
239
|
+
$notfound<<art
|
240
|
+
else
|
241
|
+
data=getPlayerData url
|
242
|
+
puts "** URL: #{url}"
|
243
|
+
unless data.empty?
|
244
|
+
puts "** Found info on soccerbase."
|
245
|
+
else
|
246
|
+
puts '** Found, but no data.'
|
247
|
+
$notfound<<art
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
pPl.read =~ /występy\(gole\)\s*=(.+)/
|
252
|
+
if $1==nil
|
253
|
+
puts '** Wiki: error. No infobox?'
|
254
|
+
else
|
255
|
+
a=$1.split(/\s*<br.*?>\s*/)[-1].strip
|
256
|
+
a=~/(\d+)\s*\((\d+)\)/
|
257
|
+
matchesW, goalsW = $1.to_i, $2.to_i
|
258
|
+
puts "** Wiki info: #{matchesW} matches, #{goalsW} goals."
|
259
|
+
end
|
260
|
+
|
261
|
+
saveData if i%30==0 && i!=0
|
262
|
+
|
263
|
+
# $change=File.open('changelist.txt','w')
|
264
|
+
# $change.sync=true
|
265
|
+
|
266
|
+
# editing
|
267
|
+
if data
|
268
|
+
#$change.puts "* [[#{art}]] - #{matchesW}/#{goalsW} -> #{matches}/#{goals}"
|
269
|
+
|
270
|
+
pPl.text=~/(kluby\s*=\s*)([^\|]+)(\s*\|)/
|
271
|
+
kluby=$2
|
272
|
+
pPl.text=~/(występy\(gole\)\s*=\s*)([^\|]+)(\s*\|)/
|
273
|
+
wystepygole=$2
|
274
|
+
|
275
|
+
resolve={}
|
276
|
+
kluby=kluby.split(/<\/?br[^>]*>/).map do |i|
|
277
|
+
short=i.strip.gsub(/\[\[(?:[^\]\|]+\||)([^\]\|]+)\]\]/,'\1').gsub(/→|\(wyp\.\)/,'').strip
|
278
|
+
resolve[short]=i.strip
|
279
|
+
short
|
280
|
+
end
|
281
|
+
wystepygole=wystepygole.split(/<\/?br[^>]*?>/).map{|i| i.strip}
|
282
|
+
wystepygole.delete_if{|i| i==''}
|
283
|
+
kluby.delete_if{|i| i==''}
|
284
|
+
|
285
|
+
wystepygole.pop while wystepygole.length>kluby.length
|
286
|
+
wystepygole.push [0,0] while wystepygole.length<kluby.length
|
287
|
+
|
288
|
+
wikidata=OrderedHash.new
|
289
|
+
kluby.each_index do |i|
|
290
|
+
wystepygole[i]=~/(\d+)\s*\((\d+)\)/
|
291
|
+
wikidata[kluby[i]]=[$1.to_i, $2.to_i]
|
292
|
+
end
|
293
|
+
|
294
|
+
# puts data.inspect
|
295
|
+
# puts wikidata.inspect
|
296
|
+
|
297
|
+
data.each_pair do |scbclub, scb, teamid|
|
298
|
+
min=[999, 'null']
|
299
|
+
wikidata.each_pair do |wikiclub, wiki|
|
300
|
+
if wikiclub.index scbclub || scbclub.index wikiclub
|
301
|
+
min=[0, wikiclub]
|
302
|
+
break
|
303
|
+
end
|
304
|
+
if wikiclub.index id2team[teamid] || id2team[teamid].index wikiclub
|
305
|
+
min=[0, wikiclub]
|
306
|
+
break
|
307
|
+
end
|
308
|
+
|
309
|
+
d=Levenshtein.distance(scbclub, wikiclub)
|
310
|
+
min=[d, wikiclub] if d<min[0]
|
311
|
+
|
312
|
+
d=Levenshtein.distance(id2team[teamid], wikiclub)
|
313
|
+
min=[d, wikiclub] if d<min[0]
|
314
|
+
end
|
315
|
+
club=min[1]
|
316
|
+
|
317
|
+
wikidata[club]=data[scbclub]
|
318
|
+
end
|
319
|
+
|
320
|
+
infoboxwystepygole=[]
|
321
|
+
infoboxkluby=[]
|
322
|
+
|
323
|
+
wikidata.each do |club, info|
|
324
|
+
infoboxkluby<<resolve[club]
|
325
|
+
infoboxwystepygole<<"#{info[0]} (#{info[1]})"
|
326
|
+
end
|
327
|
+
|
328
|
+
infoboxkluby=infoboxkluby.join('<br />')
|
329
|
+
infoboxwystepygole=infoboxwystepygole.join('<br />')
|
330
|
+
|
331
|
+
pPl.text=pPl.text.sub(/(występy\(gole\)\s*=\s*)([^\|]+?)(\s*\|)/){$1+infoboxwystepygole+$3}
|
332
|
+
pPl.text=pPl.text.sub(/(kluby\s*=\s*)([^\|]+?)(\s*\|)/){$1+infoboxkluby+$3}
|
333
|
+
pPl.text=pPl.text.sub(/(data1\s*=\s*)([^\|]+?)(\s*\|)/, '\1{{subst:CURRENTDAY}} {{subst:CURRENTMONTHNAMEGEN}} {{subst:CURRENTYEAR}}\3')
|
334
|
+
|
335
|
+
$edits+=1
|
336
|
+
pPl.save
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
print "Content-type: text/html; charset=utf8\n\r\n"
|
3
|
+
$stderr=$stdout
|
4
|
+
|
5
|
+
require 'sunflower-core.rb'
|
6
|
+
|
7
|
+
s=Sunflower.new('pl.wikipedia.org')
|
8
|
+
s.log=false
|
9
|
+
s.login
|
10
|
+
cgi=CGI.new
|
11
|
+
|
12
|
+
|
13
|
+
puts ''
|
14
|
+
puts '<p>Get list for: <form action="author-list.rb" method="GET"><input name="title"> <input type="submit" value="Go!"></form></p>'
|
15
|
+
|
16
|
+
if cgi['title'] && cgi['title']!=''
|
17
|
+
puts '<p>List of authors of '+cgi['title']+':</p>'
|
18
|
+
|
19
|
+
users=[]
|
20
|
+
hash=s.API("action=query&prop=revisions&titles=#{CGI.escape(cgi['title'])}&rvprop=user&rvlimit=5000")
|
21
|
+
hash['query']['pages'].values[0]['revisions'].each do |r|
|
22
|
+
users<<r['user']
|
23
|
+
end
|
24
|
+
while hash['query-continue']
|
25
|
+
hash=s.API("action=query&prop=revisions&titles=#{CGI.escape(cgi['title'])}&rvprop=user&rvlimit=5000&rvstartid=#{hash['query-continue']['revisions']['rvstartid']}")
|
26
|
+
hash['query']['pages'].values[0]['revisions'].each do |r|
|
27
|
+
users<<r['user']
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
users.uniq!
|
32
|
+
|
33
|
+
|
34
|
+
puts '<ul><li>'+users.join('</li><li>')+'</li></ul>'
|
35
|
+
end
|
36
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'sunflower-commontasks.rb'
|
2
|
+
require 'sunflower-listmaker.rb'
|
3
|
+
|
4
|
+
image='Plik:Obiekt zabytkowy znak.svg'
|
5
|
+
|
6
|
+
# EDIT WIKI URL BELOW
|
7
|
+
s=Sunflower.new('pl.wikipedia.org')
|
8
|
+
|
9
|
+
print "Logging in to #{s.wikiURL}... "
|
10
|
+
# EDIT USERNAME AND PASSWORD BELOW
|
11
|
+
s.login
|
12
|
+
print "done!\n"
|
13
|
+
|
14
|
+
print "Reading articles list... "
|
15
|
+
# EDIT FILENAME BELOW
|
16
|
+
list=s.make_list('image', image).sort
|
17
|
+
print "done!\n\n"
|
18
|
+
|
19
|
+
# EDIT SUMMARY BELOW
|
20
|
+
$summary='podmiana grafiki, [[WP:SK]]'
|
21
|
+
|
22
|
+
list.each do |title|
|
23
|
+
print "Reading page #{title}... "
|
24
|
+
page=Page.get(title)
|
25
|
+
print "done.\n"
|
26
|
+
print "Modifying... "
|
27
|
+
|
28
|
+
page.codeCleanup
|
29
|
+
|
30
|
+
page.text.gsub!(/\[\[#{Regexp.escape image} *\|(?:left\||)[1-6]\dpx(?:\|left|)(\|[^\]\|]+|)\]\]( *(?:\r?\n|) *|)/) do
|
31
|
+
next if $~[0].index('thumb') || $~[0].index('right')
|
32
|
+
"[[Plik:Obiekt zabytkowy.svg|20px#{$1}]] "
|
33
|
+
end
|
34
|
+
|
35
|
+
print "done.\n"
|
36
|
+
print "Saving... "
|
37
|
+
page.save unless page.orig_text.downcase==page.text.downcase
|
38
|
+
print "done!\n\n"
|
39
|
+
end
|
40
|
+
|
41
|
+
print 'Finished! Press any key to close.'
|
42
|
+
gets
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require "algorithm/diff"
|
2
|
+
require 'sunflower-commontasks.rb'
|
3
|
+
require 'sunflower-listmaker.rb'
|
4
|
+
|
5
|
+
# EDIT WIKI URL BELOW
|
6
|
+
s=Sunflower.new
|
7
|
+
|
8
|
+
print "Logging in to #{s.wikiURL}... "
|
9
|
+
# EDIT USERNAME AND PASSWORD BELOW
|
10
|
+
s.login
|
11
|
+
print "done!\n"
|
12
|
+
|
13
|
+
print "Reading articles list... "
|
14
|
+
# EDIT FILENAME BELOW
|
15
|
+
list=s.make_list('file', 'list2.txt')
|
16
|
+
print "done!\n\n"
|
17
|
+
|
18
|
+
# EDIT SUMMARY BELOW
|
19
|
+
$summary='usuwanie pogrubień z nagłówków, [[WP:SK]]'
|
20
|
+
|
21
|
+
list.each do |title|
|
22
|
+
print "Reading page #{title}... "
|
23
|
+
page=Page.get(title)
|
24
|
+
print "done.\n"
|
25
|
+
print "Modifying... "
|
26
|
+
|
27
|
+
oldtxt=page.read
|
28
|
+
|
29
|
+
page.replace(/(==+)\s*'''\s*(.+?)\s*'''\s*\1/, '\1 \2 \1') #simplest fix
|
30
|
+
page.replace(/(==+)([^']*)'''([^']*)\1/, '\1\2\3\1') #broken bolds - opened, but not closed, remove them
|
31
|
+
page.write page.text.gsub(/(==+)\s*(Znan.+? (?:osoby|ludzie) (?:nosz|o imien).+?)\s*\1/){h=$1; "#{h} #{$2.gsub("'''", '')} #{h}"} #pl.wiki specific
|
32
|
+
page.write page.text.gsub(/(==+)\s*(.+?(?:\[\[imieniny\]\]|imieniny) obchodzi)\s*\1/){h=$1; "#{h} #{$2.gsub("'''", '')} #{h}"} #pl.wiki specific
|
33
|
+
|
34
|
+
if oldtxt==page.read
|
35
|
+
print "No changes.\n\n"
|
36
|
+
next
|
37
|
+
end
|
38
|
+
|
39
|
+
page.codeCleanup
|
40
|
+
|
41
|
+
# diffs = oldtxt.diff(page.read)
|
42
|
+
|
43
|
+
# puts diffs
|
44
|
+
# gets
|
45
|
+
|
46
|
+
print "done.\n"
|
47
|
+
print "Saving... "
|
48
|
+
page.save
|
49
|
+
print "done!\n\n"
|
50
|
+
end
|
51
|
+
|
52
|
+
print 'Finished! Press any key to close.'
|
53
|
+
gets
|