generalscraper 0.0.25 → 0.0.26

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parse_page.rb +4 -3
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c1cde72e1377f7ff78be4cd057adcf570b21e011
4
- data.tar.gz: 5cf7d644b1ef9a4c4d413b3127bb0bacb830b5f2
3
+ metadata.gz: 85a128c0ae855a52ee3842ba40807b7adc84a43a
4
+ data.tar.gz: c7b8aa3648f3735c0f69ac7f91884bbd04a2450a
5
5
  SHA512:
6
- metadata.gz: 3664beea7f410638454dc7112a28fa39fd8b5209d67ad926dfd6bb2ac3666d15dee430a9baccf4c91ece14aaf41aebcff1b5c77c65cef56ec33e23e07bf928b6
7
- data.tar.gz: 7cbb4e5f824998fd39f272070d1f8f75bbc77b53bf8afb8623e6293782b40a6c4bc08e4b102e74f64445184a29f62aa82bc4bfdc105e75e4a2da2b261fb06705
6
+ metadata.gz: 54c19ac3c90c3b99b9be0115945c1f59ad71951285a5d779090f4ec91c3e4fd2fbf995c4393a27cc0d73d0efee515d62182b2851cb6205f7f4dfe2a0f17fb84d
7
+ data.tar.gz: 0ea35567e7c33f9b46ab678f2bf3231d8a7b79a883e749728de0eaa9d46e772e91cc4346bc4ce3c8ff4f9a4855b0acd06cc338e63920c1228a7e5b2ef5f36837
data/lib/parse_page.rb CHANGED
@@ -31,11 +31,12 @@ module ParsePage
31
31
 
32
32
  # Download and extract text from PDF
33
33
  def getPDF(url, pagehash)
34
- `wget --tries=2 -P public/uploads #{url}`
35
34
  path = url.split("/")
36
-
35
+ filename = path[path.length-1].chomp.strip.gsub(" ", "_").gsub("%20", "_")
36
+ `wget --tries=2 #{url} -O public/uploads/#{filename}`
37
+
37
38
  # OCR PDF and save fields
38
- u = UploadConvert.new("public/uploads/" + path[path.length-1].chomp.strip)
39
+ u = UploadConvert.new("public/uploads/" + filename)
39
40
  pdfparse = JSON.parse(u.handleDoc)
40
41
  pdfparse.each{|k, v| pagehash[k] = fixEncode(v)}
41
42
  return pagehash
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: generalscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.25
4
+ version: 0.0.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath