fromgit-url2mhtml 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Methods
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Methods</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Methods</h1>
22
+ <div id="index-entries">
23
+ <a href="classes/Url2mhtml.html#M000004">append_relative_contents (Url2mhtml)</a><br />
24
+ <a href="classes/Url2mhtml.html#M000008">capture (Url2mhtml)</a><br />
25
+ <a href="classes/Url2mhtml.html#M000007">create_mail (Url2mhtml)</a><br />
26
+ <a href="classes/Url2mhtml.html#M000006">create_mail_part (Url2mhtml)</a><br />
27
+ <a href="classes/Url2mhtml.html#M000001">get_agent (Url2mhtml)</a><br />
28
+ <a href="classes/Url2mhtml.html#M000002">get_content (Url2mhtml)</a><br />
29
+ <a href="classes/Url2mhtml.html#M000003">get_contents (Url2mhtml)</a><br />
30
+ <a href="classes/Url2mhtml.html#M000005">resolve_relative_uri (Url2mhtml)</a><br />
31
+ <a href="classes/Url2mhtmlTest.html#M000009">setup (Url2mhtmlTest)</a><br />
32
+ <a href="classes/Url2mhtmlTest.html#M000010">teardown (Url2mhtmlTest)</a><br />
33
+ <a href="classes/Url2mhtmlTest.html#M000011">test_capture (Url2mhtmlTest)</a><br />
34
+ </div>
35
+ </div>
36
+ </body>
37
+ </html>
data/doc/index.html ADDED
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5
+
6
+ <!--
7
+
8
+ RDoc Documentation
9
+
10
+ -->
11
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
12
+ <head>
13
+ <title>RDoc Documentation</title>
14
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
15
+ </head>
16
+ <frameset rows="20%, 80%">
17
+ <frameset cols="25%,35%,45%">
18
+ <frame src="fr_file_index.html" title="Files" name="Files" />
19
+ <frame src="fr_class_index.html" name="Classes" />
20
+ <frame src="fr_method_index.html" name="Methods" />
21
+ </frameset>
22
+ <frame src="files/lib/url2mhtml_rb.html" name="docwin" />
23
+ </frameset>
24
+ </html>
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }
data/lib/url2mhtml.rb ADDED
@@ -0,0 +1,106 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'tmail'
4
+
5
+ class Url2mhtml
6
+ VERSION = '0.0.2'
7
+ ContentInfo=Struct.new(:uri,:type,:body,:is_root,:title)
8
+
9
+ def self.get_agent
10
+ user_agent_alias ='Windows IE 6'
11
+ agent = WWW::Mechanize.new
12
+ agent.user_agent_alias =user_agent_alias
13
+ agent
14
+ end
15
+
16
+ def self.get_content(uri,is_root)
17
+ got_content = get_agent.get(uri)
18
+ type=got_content.response['content-type']
19
+ body=got_content.body
20
+
21
+ title= ( is_root && /html/.match(type) ) ? got_content.title : 'no title'
22
+
23
+ content_info=ContentInfo.new(uri,type,body,is_root,title)
24
+ return content_info,got_content
25
+ end
26
+
27
+ def self.get_contents(uri,is_root,content_info_list)
28
+
29
+ content_info,got_content=get_content(uri,is_root)
30
+ content_info_list << content_info
31
+
32
+ append_relative_contents(got_content,content_info_list) if /html/.match(content_info.type)
33
+ content_info_list
34
+ end
35
+
36
+ def self.append_relative_contents(page,content_info_list)
37
+ base_uri=page.uri
38
+ raw_image_uris=page.search('//img').map{|i| i['src']}
39
+ raw_image_uris.push(*(page.search('//body').find_all{|i| i['background']}.map{|i| i['background']}))
40
+ raw_image_uris.push(*(page.search('//th').find_all{|i| i['background']}.map{|i| i['background']}))
41
+ raw_image_uris.push(*(page.search('//td').find_all{|i| i['background']}.map{|i| i['background']}))
42
+ image_uris=raw_image_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
43
+
44
+ raw_frame_uris=page.frames.map{|f| f.uri}
45
+ frame_uris=raw_frame_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
46
+
47
+ raw_iframe_uris=page.iframes.map{|f| f.uri}
48
+ iframe_uris=raw_iframe_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
49
+
50
+ raw_css_uris=page.search('link[@rel="stylesheet"]').map{|l| l['href']}
51
+ css_uris=raw_css_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
52
+
53
+ raw_script_uris=page.search('script').find_all{|s| s['src']}.map{|s| s['src']}
54
+ script_uris=raw_script_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
55
+
56
+
57
+ raw_urls = ( image_uris + frame_uris + iframe_uris + css_uris + script_uris )
58
+ target_content_urls = raw_urls.map{|u| u.gsub(/#.*/,'')}.uniq.find_all{|u| content_info_list.any?{|content| u != content.uri}}
59
+
60
+ target_content_urls.each{|uri| get_contents(uri,false,content_info_list)}
61
+ end
62
+
63
+ def self.resolve_relative_uri(base_uri,target_uri)
64
+ URI.join(base_uri.to_s,target_uri).to_s
65
+ end
66
+
67
+ def self.create_mail_part(content)
68
+ part=TMail::Mail.new
69
+
70
+ part['content-location']=content.uri
71
+ part.content_type = content.type
72
+ if /html/.match(content.type)
73
+ part.transfer_encoding = '8bit'
74
+ part.body = content.body
75
+ else
76
+ part.transfer_encoding = 'base64'
77
+ b64encoded_body = [content.body].pack('m').chomp.gsub(/.{76}/, "\\1\n")
78
+ part.body = b64encoded_body
79
+ end
80
+ part
81
+ end
82
+
83
+ def self.create_mail(title,parts)
84
+ mail=TMail::Mail.new
85
+ mail.from='url2MHTML'
86
+ mail.subject = title
87
+ mail.date = Time.now
88
+ mail.mime_version = '1.0'
89
+ mail['X-MimeOLE']='url2MHTML'
90
+
91
+ mail.body = "This is a multi-part message in MIME format.\n"
92
+
93
+ parts.each{|part| mail.parts.push(part) }
94
+
95
+ mail.content_type='multipart/related; type="text/html"'
96
+ mail
97
+ end
98
+
99
+ def self.capture(uri)
100
+ page_content_list=get_contents(uri,true,[])
101
+ title=page_content_list.find(ContentInfo.new(nil,nil,nil,nil,'no title')) { |content| content.is_root == true }.title
102
+ mail_parts=page_content_list.map{|content| create_mail_part(content)}
103
+ mail=create_mail(title,mail_parts)
104
+ mail.encoded
105
+ end
106
+ end
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/../lib/url2mhtml'
3
+
@@ -0,0 +1,16 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ require "test/unit"
4
+ class Url2mhtmlTest < Test::Unit::TestCase
5
+ def setup
6
+ end
7
+
8
+ def teardown
9
+ end
10
+
11
+ def test_capture
12
+ result = Url2mhtml.capture('http://www.google.com')
13
+ assert(result, "Failure.")
14
+ end
15
+
16
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fromgit-url2mhtml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - forgithubid
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-09-17 00:00:00 -07:00
13
+ default_executable: url2mhtml
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mechanize
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.7.8
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: tmail
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: 1.2.3.1
32
+ version:
33
+ description: generate to MHTML from URL.
34
+ email: forgithubid@gmail.com
35
+ executables:
36
+ - url2mhtml
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - ChangeLog
42
+ files:
43
+ - README
44
+ - ChangeLog
45
+ - Rakefile
46
+ - bin/url2mhtml
47
+ - doc/classes
48
+ - doc/classes/Url2mhtml.html
49
+ - doc/classes/Url2mhtml.src
50
+ - doc/classes/Url2mhtml.src/M000001.html
51
+ - doc/classes/Url2mhtml.src/M000002.html
52
+ - doc/classes/Url2mhtml.src/M000003.html
53
+ - doc/classes/Url2mhtml.src/M000004.html
54
+ - doc/classes/Url2mhtml.src/M000005.html
55
+ - doc/classes/Url2mhtml.src/M000006.html
56
+ - doc/classes/Url2mhtml.src/M000007.html
57
+ - doc/classes/Url2mhtml.src/M000008.html
58
+ - doc/classes/Url2mhtmlTest.html
59
+ - doc/classes/Url2mhtmlTest.src
60
+ - doc/classes/Url2mhtmlTest.src/M000009.html
61
+ - doc/classes/Url2mhtmlTest.src/M000010.html
62
+ - doc/classes/Url2mhtmlTest.src/M000011.html
63
+ - doc/created.rid
64
+ - doc/files
65
+ - doc/files/lib
66
+ - doc/files/lib/url2mhtml_rb.html
67
+ - doc/files/test
68
+ - doc/files/test/test_helper_rb.html
69
+ - doc/files/test/url2mhtml_test_rb.html
70
+ - doc/fr_class_index.html
71
+ - doc/fr_file_index.html
72
+ - doc/fr_method_index.html
73
+ - doc/index.html
74
+ - doc/rdoc-style.css
75
+ - test/test_helper.rb
76
+ - test/url2mhtml_test.rb
77
+ - lib/url2mhtml.rb
78
+ has_rdoc: true
79
+ homepage: http://github.com/fromgit/url2mhtml/tree/master
80
+ post_install_message:
81
+ rdoc_options: []
82
+
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ version:
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: "0"
96
+ version:
97
+ requirements: []
98
+
99
+ rubyforge_project:
100
+ rubygems_version: 1.2.0
101
+ signing_key:
102
+ specification_version: 2
103
+ summary: generate to MHTML from URL.
104
+ test_files:
105
+ - test/url2mhtml_test.rb