generalscraper 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parse_page.rb +3 -3
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 48ee021e7ac6bb45a00308d69003bd6ba379b20b
4
- data.tar.gz: d3b631127266dbfaacaee4eb74c2868e48a1f0c8
3
+ metadata.gz: 21df27ba7416ab3ea410f3c38f0aea43cfa0c5f0
4
+ data.tar.gz: bbdfaa98d9c7c0600dc626b43da0127a43472b36
5
5
  SHA512:
6
- metadata.gz: b15d9ce46f5223be79fca5ba74423c0eab88c03dc3ed1e40baef500d30ab9f15c1f364bfb23244ea1dc741edcd91281b779b4ff1170341f0c534859aa174ff94
7
- data.tar.gz: 149dadfabb77b586164c4213fd58bca33a5de5d0c64af48c04db6f4e47eaf3c5c1563ceaeedd7e9a97c813e7e5b95cc45a671734b8a5d2b78212db0d30d700ed
6
+ metadata.gz: f9c37e1e151b37d4eb231fb22304d9f7868eed8a02cb874aa9968756be0f2ad2f555f36c2e8a02977c353579232c6c7a40ee776236ec425e2bef28959f6ce80a
7
+ data.tar.gz: 57d4a622ed823a0acad91bea00a787e2f341721ed110c3d5a88b93f9c981e267a14d62a0db1c71993a10dd9042d77496833724a5f893123288b15f9d9faf9223
data/lib/parse_page.rb CHANGED
@@ -23,7 +23,7 @@ module ParsePage
23
23
  # Download the page text
24
24
  def getHTMLText(url, pagehash)
25
25
  html = Nokogiri::HTML(getPage(url).body)
26
- pagehash[:text] = html.css("body").text
26
+ pagehash[:text] = html.css("body").text.encode("UTF-8")
27
27
  return pagehash
28
28
  end
29
29
 
@@ -35,7 +35,7 @@ module ParsePage
35
35
  # OCR PDF and save fields
36
36
  u = UploadConvert.new("public/uploads/" + path[path.length-1].chomp.strip)
37
37
  pdfparse = JSON.parse(u.handleDoc)
38
- pdfparse.each{|k, v| pagehash[k] = v}
38
+ pdfparse.each{|k, v| pagehash[k] = v.encode("UTF-8")}
39
39
  return pagehash
40
40
  end
41
41
 
@@ -51,7 +51,7 @@ module ParsePage
51
51
 
52
52
  # Get title and meta tag info
53
53
  html = Nokogiri::HTML(getPage(url).body) # Eventually modify this
54
- pagehash[:title] = html.css("title").text
54
+ pagehash[:title] = html.css("title").text.encode("UTF-8")
55
55
  html.css("meta").each do |m|
56
56
  if m
57
57
  pagehash[m['name']] = m['content']
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: generalscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath