sunflower 0.3 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/lib/sunflower/commontasks.rb +265 -250
- data/lib/sunflower/core.rb +288 -287
- data/lib/sunflower/listmaker.rb +160 -152
- data/scripts/ZDBOT.rb +61 -61
- data/scripts/aktualizacjapilkarzy.rb +339 -339
- data/scripts/changeimage.rb +41 -41
- data/scripts/fix-bold-in-headers.rb +41 -53
- data/scripts/fix-double-pipes.rb +30 -49
- data/scripts/fix-langs.rb +42 -42
- data/scripts/fix-multiple-same-refs.rb +101 -101
- data/scripts/fix-some-entities.rb +36 -43
- data/scripts/fix-unicode-control-chars.rb +30 -51
- data/scripts/insight.rb +132 -132
- data/scripts/lekkoatl-portal.rb +50 -50
- data/scripts/make-id2team-list.rb +31 -31
- data/scripts/recat.rb +27 -32
- data/scripts/wanted.rb +72 -72
- metadata +40 -62
@@ -1,51 +1,30 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
print "
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
page.codeCleanup
|
33
|
-
|
34
|
-
print "done.\n"
|
35
|
-
print "Saving... "
|
36
|
-
page.save
|
37
|
-
print "done!\n\n"
|
38
|
-
rescue
|
39
|
-
failcounter+=1
|
40
|
-
if failcounter<5
|
41
|
-
print "#{failcounter}th error, retrying!\n"
|
42
|
-
redo
|
43
|
-
else
|
44
|
-
print "#{failcounter}th error!\n\n"
|
45
|
-
next
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
print 'Finished! Press any key to close.'
|
51
|
-
gets
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'sunflower'
|
4
|
+
s = Sunflower.new.login
|
5
|
+
|
6
|
+
url = 'http://toolserver.org/~sk/cgi-bin/checkwiki/checkwiki.cgi?project=plwiki&view=bots&id=16&offset=0&limit=2500'
|
7
|
+
|
8
|
+
print "Reading articles list... "
|
9
|
+
# EDIT FILENAME BELOW
|
10
|
+
str=Net::HTTP.get(URI.parse(url))
|
11
|
+
list=str[(str.index('<pre>')+5)...(str.index('</pre>'))].strip.split(/\r?\n/).uniq
|
12
|
+
print "done (#{list.length} to do)!\n\n"
|
13
|
+
|
14
|
+
# EDIT SUMMARY BELOW
|
15
|
+
s.summary='poprawa znaków kontrolnych Unicode, [[WP:SK]]'
|
16
|
+
|
17
|
+
list.each do |title|
|
18
|
+
print "Reading page #{title}... "
|
19
|
+
page=Page.new(title)
|
20
|
+
print "done.\n"
|
21
|
+
print "Modifying... "
|
22
|
+
|
23
|
+
page.replace(/||/, "")
|
24
|
+
page.code_cleanup unless page.orig_text==page.text
|
25
|
+
|
26
|
+
print "done.\n"
|
27
|
+
print "Saving... "
|
28
|
+
page.save
|
29
|
+
print "done!\n\n"
|
30
|
+
end
|
data/scripts/insight.rb
CHANGED
@@ -1,133 +1,133 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
require 'uri'
|
3
|
-
require 'cgi'
|
4
|
-
include Net
|
5
|
-
|
6
|
-
require 'sunflower-core.rb'
|
7
|
-
s=Sunflower.new
|
8
|
-
s.login
|
9
|
-
|
10
|
-
przejrzane=Page.new 'Wikipedysta:PMG/przejrzane'
|
11
|
-
t=przejrzane.read
|
12
|
-
|
13
|
-
|
14
|
-
begin
|
15
|
-
f=File.open('puredata.txt','r')
|
16
|
-
pd1=f.read.split(/\r?\n/)
|
17
|
-
f.close
|
18
|
-
rescue
|
19
|
-
pd1=[]
|
20
|
-
end
|
21
|
-
|
22
|
-
pdv=[]
|
23
|
-
pdn=[]
|
24
|
-
pd1.each do |i|
|
25
|
-
n=i.split('|')
|
26
|
-
n[1]='' if n[1]==nil
|
27
|
-
|
28
|
-
pdn<<n[0]
|
29
|
-
pdv<<n[1]
|
30
|
-
end
|
31
|
-
|
32
|
-
f=File.open('puredata.txt','w')
|
33
|
-
|
34
|
-
$counter=0
|
35
|
-
nt=t.sub(/(\{\| class="wikitable sortable"(?:\s*!.+)+)\s*\|-([\s\S]+?)(\|\})/){
|
36
|
-
nl="\n" #shorten
|
37
|
-
before=$1
|
38
|
-
after=$3
|
39
|
-
|
40
|
-
data=$2.split("|-\n|")
|
41
|
-
data=data.map{|i|
|
42
|
-
i.strip.split(/\s*\|\s*/)
|
43
|
-
}
|
44
|
-
|
45
|
-
data2=[]
|
46
|
-
for d in data
|
47
|
-
d.shift if d[0].strip==''
|
48
|
-
|
49
|
-
# load puredata, if possible, and skip the rest
|
50
|
-
# if pd[d[-1]]!=nil #&& pd[d[-1]]!=''
|
51
|
-
i=pdn.index(d[-1])
|
52
|
-
if i!=nil && pdv[pdn.index(d[-1])]!=nil
|
53
|
-
puts d[-1]+': (datafile)'
|
54
|
-
puts ' '+pdv[i]
|
55
|
-
|
56
|
-
last=d[-1]
|
57
|
-
d[-1]=pdv[i]
|
58
|
-
d<<last
|
59
|
-
|
60
|
-
data2<<d
|
61
|
-
$counter=0
|
62
|
-
|
63
|
-
#rewrite puredata
|
64
|
-
f.write(d[-1]+'|'+pdv[i]+nl)
|
65
|
-
f.flush
|
66
|
-
|
67
|
-
next #skip the rest of loop
|
68
|
-
end
|
69
|
-
|
70
|
-
url=d[0].sub(/^\*+ \[(http:[^ ]+) [^\]]+\]$/,'\1')
|
71
|
-
|
72
|
-
#puts url
|
73
|
-
puts d[-1]+':' if $counter==0
|
74
|
-
|
75
|
-
begin
|
76
|
-
res=HTTP.get(URI.parse(url.sub(/&category=([^&]+)/){'&category='+CGI.escape($1)}))
|
77
|
-
|
78
|
-
f2=File.open('last.txt','w')
|
79
|
-
f2.write(res)
|
80
|
-
f2.close
|
81
|
-
|
82
|
-
res=~/Znaleziono (\d+) nieprzejrza/
|
83
|
-
num=$1
|
84
|
-
rescue Timeout::Error
|
85
|
-
num=nil
|
86
|
-
$counter+=3 #repeat only once
|
87
|
-
end
|
88
|
-
|
89
|
-
if num==nil
|
90
|
-
$counter+=1
|
91
|
-
if $counter<5
|
92
|
-
puts 'Retrying...'
|
93
|
-
redo
|
94
|
-
else
|
95
|
-
num=''
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
puts ' '+num
|
100
|
-
|
101
|
-
last=d[-1]
|
102
|
-
d[-1]=num
|
103
|
-
d<<last
|
104
|
-
|
105
|
-
data2<<d
|
106
|
-
$counter=0
|
107
|
-
|
108
|
-
#write puredata to file
|
109
|
-
f.write(d[-1]+'|'+num+nl)
|
110
|
-
f.flush
|
111
|
-
end
|
112
|
-
|
113
|
-
data3=nl
|
114
|
-
for d in data2
|
115
|
-
data3+='|-'+nl+'|'+d.join(nl+'|')+nl
|
116
|
-
end
|
117
|
-
|
118
|
-
months=%w(zero stycznia lutego marca kwietnia maja czerwca lipca sierpnia września października listopada grudnia)
|
119
|
-
d=Date.parse(Time.now.to_s)
|
120
|
-
|
121
|
-
before.sub(/(!Link do kategorii)/,'!'+d.day.to_s+' '+months[d.month]+' '+d.year.to_s+nl+'\1')+data3+after #print it out
|
122
|
-
}
|
123
|
-
|
124
|
-
|
125
|
-
f.close #puredata.txt
|
126
|
-
|
127
|
-
f=File.open('data2.txt','w')
|
128
|
-
f.write(nt)
|
129
|
-
f.close
|
130
|
-
|
131
|
-
$summary='aktualizacja'
|
132
|
-
przejrzane.write nt
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'cgi'
|
4
|
+
include Net
|
5
|
+
|
6
|
+
require 'sunflower-core.rb'
|
7
|
+
s=Sunflower.new
|
8
|
+
s.login
|
9
|
+
|
10
|
+
przejrzane=Page.new 'Wikipedysta:PMG/przejrzane'
|
11
|
+
t=przejrzane.read
|
12
|
+
|
13
|
+
|
14
|
+
begin
|
15
|
+
f=File.open('puredata.txt','r')
|
16
|
+
pd1=f.read.split(/\r?\n/)
|
17
|
+
f.close
|
18
|
+
rescue
|
19
|
+
pd1=[]
|
20
|
+
end
|
21
|
+
|
22
|
+
pdv=[]
|
23
|
+
pdn=[]
|
24
|
+
pd1.each do |i|
|
25
|
+
n=i.split('|')
|
26
|
+
n[1]='' if n[1]==nil
|
27
|
+
|
28
|
+
pdn<<n[0]
|
29
|
+
pdv<<n[1]
|
30
|
+
end
|
31
|
+
|
32
|
+
f=File.open('puredata.txt','w')
|
33
|
+
|
34
|
+
$counter=0
|
35
|
+
nt=t.sub(/(\{\| class="wikitable sortable"(?:\s*!.+)+)\s*\|-([\s\S]+?)(\|\})/){
|
36
|
+
nl="\n" #shorten
|
37
|
+
before=$1
|
38
|
+
after=$3
|
39
|
+
|
40
|
+
data=$2.split("|-\n|")
|
41
|
+
data=data.map{|i|
|
42
|
+
i.strip.split(/\s*\|\s*/)
|
43
|
+
}
|
44
|
+
|
45
|
+
data2=[]
|
46
|
+
for d in data
|
47
|
+
d.shift if d[0].strip==''
|
48
|
+
|
49
|
+
# load puredata, if possible, and skip the rest
|
50
|
+
# if pd[d[-1]]!=nil #&& pd[d[-1]]!=''
|
51
|
+
i=pdn.index(d[-1])
|
52
|
+
if i!=nil && pdv[pdn.index(d[-1])]!=nil
|
53
|
+
puts d[-1]+': (datafile)'
|
54
|
+
puts ' '+pdv[i]
|
55
|
+
|
56
|
+
last=d[-1]
|
57
|
+
d[-1]=pdv[i]
|
58
|
+
d<<last
|
59
|
+
|
60
|
+
data2<<d
|
61
|
+
$counter=0
|
62
|
+
|
63
|
+
#rewrite puredata
|
64
|
+
f.write(d[-1]+'|'+pdv[i]+nl)
|
65
|
+
f.flush
|
66
|
+
|
67
|
+
next #skip the rest of loop
|
68
|
+
end
|
69
|
+
|
70
|
+
url=d[0].sub(/^\*+ \[(http:[^ ]+) [^\]]+\]$/,'\1')
|
71
|
+
|
72
|
+
#puts url
|
73
|
+
puts d[-1]+':' if $counter==0
|
74
|
+
|
75
|
+
begin
|
76
|
+
res=HTTP.get(URI.parse(url.sub(/&category=([^&]+)/){'&category='+CGI.escape($1)}))
|
77
|
+
|
78
|
+
f2=File.open('last.txt','w')
|
79
|
+
f2.write(res)
|
80
|
+
f2.close
|
81
|
+
|
82
|
+
res=~/Znaleziono (\d+) nieprzejrza/
|
83
|
+
num=$1
|
84
|
+
rescue Timeout::Error
|
85
|
+
num=nil
|
86
|
+
$counter+=3 #repeat only once
|
87
|
+
end
|
88
|
+
|
89
|
+
if num==nil
|
90
|
+
$counter+=1
|
91
|
+
if $counter<5
|
92
|
+
puts 'Retrying...'
|
93
|
+
redo
|
94
|
+
else
|
95
|
+
num=''
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
puts ' '+num
|
100
|
+
|
101
|
+
last=d[-1]
|
102
|
+
d[-1]=num
|
103
|
+
d<<last
|
104
|
+
|
105
|
+
data2<<d
|
106
|
+
$counter=0
|
107
|
+
|
108
|
+
#write puredata to file
|
109
|
+
f.write(d[-1]+'|'+num+nl)
|
110
|
+
f.flush
|
111
|
+
end
|
112
|
+
|
113
|
+
data3=nl
|
114
|
+
for d in data2
|
115
|
+
data3+='|-'+nl+'|'+d.join(nl+'|')+nl
|
116
|
+
end
|
117
|
+
|
118
|
+
months=%w(zero stycznia lutego marca kwietnia maja czerwca lipca sierpnia września października listopada grudnia)
|
119
|
+
d=Date.parse(Time.now.to_s)
|
120
|
+
|
121
|
+
before.sub(/(!Link do kategorii)/,'!'+d.day.to_s+' '+months[d.month]+' '+d.year.to_s+nl+'\1')+data3+after #print it out
|
122
|
+
}
|
123
|
+
|
124
|
+
|
125
|
+
f.close #puredata.txt
|
126
|
+
|
127
|
+
f=File.open('data2.txt','w')
|
128
|
+
f.write(nt)
|
129
|
+
f.close
|
130
|
+
|
131
|
+
$summary='aktualizacja'
|
132
|
+
przejrzane.write nt
|
133
133
|
przejrzane.save
|
data/scripts/lekkoatl-portal.rb
CHANGED
@@ -1,51 +1,51 @@
|
|
1
|
-
require 'sunflower-commontasks.rb'
|
2
|
-
require 'sunflower-listmaker.rb'
|
3
|
-
|
4
|
-
# EDIT WIKI URL BELOW
|
5
|
-
s=Sunflower.new('pl.wikipedia.org')
|
6
|
-
|
7
|
-
print "Logging in to #{s.wikiURL}... "
|
8
|
-
# EDIT USERNAME AND PASSWORD BELOW
|
9
|
-
s.login
|
10
|
-
print "done!\n"
|
11
|
-
|
12
|
-
print "Reading articles list... "
|
13
|
-
# EDIT FILENAME BELOW
|
14
|
-
# list=s.make_list('file', 'lekkoatl.txt')
|
15
|
-
# nice generating:
|
16
|
-
all=s.make_list('categoryr', 'Kategoria:Lekkoatletyka')
|
17
|
-
done=s.make_list('category', 'Kategoria:Wikiprojekt:Lekkoatletyka/hasła')
|
18
|
-
error=done.collect{|a| a.index ':'}
|
19
|
-
done=done.map{|a| (a.index ':' ? '' : 'Dyskusja:'+a)}
|
20
|
-
done.delete_if{|a| a==''}
|
21
|
-
list=all-done
|
22
|
-
print "done!\n\n"
|
23
|
-
|
24
|
-
File.open('err.txt','w'){|f| f.write error.join("\n")}
|
25
|
-
File.open('ok.txt','w'){|f| f.write list.join("\n")}
|
26
|
-
|
27
|
-
# EDIT SUMMARY BELOW
|
28
|
-
$summary='dodanie {{[[Portal:Lekkoatletyka/Info]]}}'
|
29
|
-
|
30
|
-
list.each do |title|
|
31
|
-
print "Reading page #{title}... "
|
32
|
-
page=Page.get('Dyskusja:'+title)
|
33
|
-
print "done.\n"
|
34
|
-
print "Modifying... "
|
35
|
-
|
36
|
-
if page.text.index('Portal:Lekkoatletyka/Info')!=nil
|
37
|
-
page.dump
|
38
|
-
next
|
39
|
-
end
|
40
|
-
|
41
|
-
nl=(page.text.lstrip.index('{{')==0 ? "\n" : "\n\n")
|
42
|
-
page.text='{{Portal:Lekkoatletyka/Info}}'+nl+page.text.lstrip
|
43
|
-
|
44
|
-
print "done.\n"
|
45
|
-
print "Saving... "
|
46
|
-
page.save
|
47
|
-
print "done!\n\n"
|
48
|
-
end
|
49
|
-
|
50
|
-
print 'Finished! Press any key to close.'
|
1
|
+
require 'sunflower-commontasks.rb'
|
2
|
+
require 'sunflower-listmaker.rb'
|
3
|
+
|
4
|
+
# EDIT WIKI URL BELOW
|
5
|
+
s=Sunflower.new('pl.wikipedia.org')
|
6
|
+
|
7
|
+
print "Logging in to #{s.wikiURL}... "
|
8
|
+
# EDIT USERNAME AND PASSWORD BELOW
|
9
|
+
s.login
|
10
|
+
print "done!\n"
|
11
|
+
|
12
|
+
print "Reading articles list... "
|
13
|
+
# EDIT FILENAME BELOW
|
14
|
+
# list=s.make_list('file', 'lekkoatl.txt')
|
15
|
+
# nice generating:
|
16
|
+
all=s.make_list('categoryr', 'Kategoria:Lekkoatletyka')
|
17
|
+
done=s.make_list('category', 'Kategoria:Wikiprojekt:Lekkoatletyka/hasła')
|
18
|
+
error=done.collect{|a| a.index ':'}
|
19
|
+
done=done.map{|a| (a.index ':' ? '' : 'Dyskusja:'+a)}
|
20
|
+
done.delete_if{|a| a==''}
|
21
|
+
list=all-done
|
22
|
+
print "done!\n\n"
|
23
|
+
|
24
|
+
File.open('err.txt','w'){|f| f.write error.join("\n")}
|
25
|
+
File.open('ok.txt','w'){|f| f.write list.join("\n")}
|
26
|
+
|
27
|
+
# EDIT SUMMARY BELOW
|
28
|
+
$summary='dodanie {{[[Portal:Lekkoatletyka/Info]]}}'
|
29
|
+
|
30
|
+
list.each do |title|
|
31
|
+
print "Reading page #{title}... "
|
32
|
+
page=Page.get('Dyskusja:'+title)
|
33
|
+
print "done.\n"
|
34
|
+
print "Modifying... "
|
35
|
+
|
36
|
+
if page.text.index('Portal:Lekkoatletyka/Info')!=nil
|
37
|
+
page.dump
|
38
|
+
next
|
39
|
+
end
|
40
|
+
|
41
|
+
nl=(page.text.lstrip.index('{{')==0 ? "\n" : "\n\n")
|
42
|
+
page.text='{{Portal:Lekkoatletyka/Info}}'+nl+page.text.lstrip
|
43
|
+
|
44
|
+
print "done.\n"
|
45
|
+
print "Saving... "
|
46
|
+
page.save
|
47
|
+
print "done!\n\n"
|
48
|
+
end
|
49
|
+
|
50
|
+
print 'Finished! Press any key to close.'
|
51
51
|
gets
|
@@ -1,32 +1,32 @@
|
|
1
|
-
require 'hpricot'
|
2
|
-
require 'net/http'
|
3
|
-
include Net
|
4
|
-
|
5
|
-
def get(url)
|
6
|
-
return HTTP.get(URI.parse(url))
|
7
|
-
end
|
8
|
-
|
9
|
-
f=File.open('id2team.txt','w')
|
10
|
-
f.sync=true
|
11
|
-
|
12
|
-
((1..4).to_a + (12..15).to_a).each do |i|
|
13
|
-
page=get("http://www.soccerbase.com/teams.sd?competitionid=#{i}")
|
14
|
-
h=Hpricot.parse page
|
15
|
-
|
16
|
-
h2=Hpricot.parse h.search('table table table')[2].inner_html
|
17
|
-
|
18
|
-
h2.search('tr').each do |tr|
|
19
|
-
begin
|
20
|
-
if tr.at('b').inner_html.strip=='Team'||tr.inner_html.index('<script')||tr.inner_html.index('<img')
|
21
|
-
next
|
22
|
-
end
|
23
|
-
rescue Exception
|
24
|
-
end
|
25
|
-
id=tr.at('a')['href'].sub(/\A.+?(\d+)\Z/, '\1')
|
26
|
-
team=tr.at('a').inner_html.strip
|
27
|
-
|
28
|
-
f.puts "#{id}\t#{team}"
|
29
|
-
end
|
30
|
-
|
31
|
-
|
1
|
+
require 'hpricot'
|
2
|
+
require 'net/http'
|
3
|
+
include Net
|
4
|
+
|
5
|
+
def get(url)
|
6
|
+
return HTTP.get(URI.parse(url))
|
7
|
+
end
|
8
|
+
|
9
|
+
f=File.open('id2team.txt','w')
|
10
|
+
f.sync=true
|
11
|
+
|
12
|
+
((1..4).to_a + (12..15).to_a).each do |i|
|
13
|
+
page=get("http://www.soccerbase.com/teams.sd?competitionid=#{i}")
|
14
|
+
h=Hpricot.parse page
|
15
|
+
|
16
|
+
h2=Hpricot.parse h.search('table table table')[2].inner_html
|
17
|
+
|
18
|
+
h2.search('tr').each do |tr|
|
19
|
+
begin
|
20
|
+
if tr.at('b').inner_html.strip=='Team'||tr.inner_html.index('<script')||tr.inner_html.index('<img')
|
21
|
+
next
|
22
|
+
end
|
23
|
+
rescue Exception
|
24
|
+
end
|
25
|
+
id=tr.at('a')['href'].sub(/\A.+?(\d+)\Z/, '\1')
|
26
|
+
team=tr.at('a').inner_html.strip
|
27
|
+
|
28
|
+
f.puts "#{id}\t#{team}"
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
32
|
end
|
data/scripts/recat.rb
CHANGED
@@ -1,32 +1,27 @@
|
|
1
|
-
|
2
|
-
require 'sunflower
|
3
|
-
|
4
|
-
from=
|
5
|
-
to=
|
6
|
-
|
7
|
-
s=Sunflower.new
|
8
|
-
|
9
|
-
print "
|
10
|
-
|
11
|
-
print "
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
page
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
page.
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
print "done!\n\n"
|
29
|
-
end
|
30
|
-
|
31
|
-
print 'Finished! Press any key to close.'
|
32
|
-
gets
|
1
|
+
# coding: utf-8
|
2
|
+
require 'sunflower'
|
3
|
+
|
4
|
+
from = ''
|
5
|
+
to = ''
|
6
|
+
|
7
|
+
s = Sunflower.new.login
|
8
|
+
|
9
|
+
print "Reading articles list... "
|
10
|
+
list=s.make_list('category', 'Kategoria:'+from).sort
|
11
|
+
print "done!\n\n"
|
12
|
+
|
13
|
+
s.summary = ''
|
14
|
+
|
15
|
+
list.each do |title|
|
16
|
+
print "Reading page #{title}... "
|
17
|
+
page=Page.get(title)
|
18
|
+
print "modifying... "
|
19
|
+
|
20
|
+
page.code_cleanup
|
21
|
+
page.change_category from, to
|
22
|
+
|
23
|
+
print "saving... "
|
24
|
+
page.save
|
25
|
+
print "done!\n\n"
|
26
|
+
end
|
27
|
+
|