generalscraper 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parse_page.rb +3 -3
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 48ee021e7ac6bb45a00308d69003bd6ba379b20b
4
- data.tar.gz: d3b631127266dbfaacaee4eb74c2868e48a1f0c8
3
+ metadata.gz: 21df27ba7416ab3ea410f3c38f0aea43cfa0c5f0
4
+ data.tar.gz: bbdfaa98d9c7c0600dc626b43da0127a43472b36
5
5
  SHA512:
6
- metadata.gz: b15d9ce46f5223be79fca5ba74423c0eab88c03dc3ed1e40baef500d30ab9f15c1f364bfb23244ea1dc741edcd91281b779b4ff1170341f0c534859aa174ff94
7
- data.tar.gz: 149dadfabb77b586164c4213fd58bca33a5de5d0c64af48c04db6f4e47eaf3c5c1563ceaeedd7e9a97c813e7e5b95cc45a671734b8a5d2b78212db0d30d700ed
6
+ metadata.gz: f9c37e1e151b37d4eb231fb22304d9f7868eed8a02cb874aa9968756be0f2ad2f555f36c2e8a02977c353579232c6c7a40ee776236ec425e2bef28959f6ce80a
7
+ data.tar.gz: 57d4a622ed823a0acad91bea00a787e2f341721ed110c3d5a88b93f9c981e267a14d62a0db1c71993a10dd9042d77496833724a5f893123288b15f9d9faf9223
data/lib/parse_page.rb CHANGED
@@ -23,7 +23,7 @@ module ParsePage
23
23
  # Download the page text
24
24
  def getHTMLText(url, pagehash)
25
25
  html = Nokogiri::HTML(getPage(url).body)
26
- pagehash[:text] = html.css("body").text
26
+ pagehash[:text] = html.css("body").text.encode("UTF-8")
27
27
  return pagehash
28
28
  end
29
29
 
@@ -35,7 +35,7 @@ module ParsePage
35
35
  # OCR PDF and save fields
36
36
  u = UploadConvert.new("public/uploads/" + path[path.length-1].chomp.strip)
37
37
  pdfparse = JSON.parse(u.handleDoc)
38
- pdfparse.each{|k, v| pagehash[k] = v}
38
+ pdfparse.each{|k, v| pagehash[k] = v.encode("UTF-8")}
39
39
  return pagehash
40
40
  end
41
41
 
@@ -51,7 +51,7 @@ module ParsePage
51
51
 
52
52
  # Get title and meta tag info
53
53
  html = Nokogiri::HTML(getPage(url).body) # Eventually modify this
54
- pagehash[:title] = html.css("title").text
54
+ pagehash[:title] = html.css("title").text.encode("UTF-8")
55
55
  html.css("meta").each do |m|
56
56
  if m
57
57
  pagehash[m['name']] = m['content']
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: generalscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath