docmago_client 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/docmago_client/html_resource_archiver.rb +17 -5
- data/lib/docmago_client/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6d8973a0f28f504ec927f92b7aac8809c177796
|
4
|
+
data.tar.gz: 27d1c2392e28e334e0cd6a930ab8f78a13b672b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a6323b2eab8a3b786b97f4a295e64cda4bec417d3f277f2731b33599facf32d6e65dc4d136d07191fdabcfa380f48f43f037c8e7951d000df12f5b903d004f69
|
7
|
+
data.tar.gz: 5929d731a6ec1e56ae8af0e84519b2c1fa2f06341852df0647b8bf3aa83b2008639fdf1e668d3b634e37ac143eff74b39e58d35a0da7aef601bb07e76dbe84f7
|
@@ -1,7 +1,16 @@
|
|
1
|
+
require 'digest'
|
2
|
+
require 'open-uri'
|
1
3
|
require 'addressable/uri'
|
2
4
|
require 'nokogiri'
|
3
5
|
require 'zip'
|
4
6
|
|
7
|
+
class URI::Parser
|
8
|
+
def split url
|
9
|
+
a = Addressable::URI::parse url
|
10
|
+
[a.scheme, a.userinfo, a.host, a.port, nil, a.path, nil, a.query, a.fragment]
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
5
14
|
module DocmagoClient
|
6
15
|
class HTMLResourceArchiver
|
7
16
|
def initialize(html, base_path='.')
|
@@ -15,14 +24,17 @@ module DocmagoClient
|
|
15
24
|
zipfile.get_output_stream("document.html") { |f| f.write @html }
|
16
25
|
|
17
26
|
fetch_uris.each do |uri|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
27
|
+
uri = Addressable::URI.parse uri.to_s.strip
|
28
|
+
path_digest = Digest::MD5.hexdigest(normalize_uri(uri))
|
29
|
+
|
30
|
+
file_data = open(uri).read if uri.absolute?
|
31
|
+
file_data ||= File.read(resolve_uri(uri)) if File.exists?(resolve_uri(uri))
|
32
|
+
|
33
|
+
zipfile.get_output_stream(path_digest) { |f| f.write file_data } if file_data
|
22
34
|
end
|
23
35
|
end
|
24
36
|
|
25
|
-
|
37
|
+
file_path
|
26
38
|
end
|
27
39
|
|
28
40
|
private
|