fromgit-url2mhtml 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,37 @@
1
+
2
+ <?xml version="1.0" encoding="iso-8859-1"?>
3
+ <!DOCTYPE html
4
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
5
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
6
+
7
+ <!--
8
+
9
+ Methods
10
+
11
+ -->
12
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
13
+ <head>
14
+ <title>Methods</title>
15
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
16
+ <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
17
+ <base target="docwin" />
18
+ </head>
19
+ <body>
20
+ <div id="index">
21
+ <h1 class="section-bar">Methods</h1>
22
+ <div id="index-entries">
23
+ <a href="classes/Url2mhtml.html#M000004">append_relative_contents (Url2mhtml)</a><br />
24
+ <a href="classes/Url2mhtml.html#M000008">capture (Url2mhtml)</a><br />
25
+ <a href="classes/Url2mhtml.html#M000007">create_mail (Url2mhtml)</a><br />
26
+ <a href="classes/Url2mhtml.html#M000006">create_mail_part (Url2mhtml)</a><br />
27
+ <a href="classes/Url2mhtml.html#M000001">get_agent (Url2mhtml)</a><br />
28
+ <a href="classes/Url2mhtml.html#M000002">get_content (Url2mhtml)</a><br />
29
+ <a href="classes/Url2mhtml.html#M000003">get_contents (Url2mhtml)</a><br />
30
+ <a href="classes/Url2mhtml.html#M000005">resolve_relative_uri (Url2mhtml)</a><br />
31
+ <a href="classes/Url2mhtmlTest.html#M000009">setup (Url2mhtmlTest)</a><br />
32
+ <a href="classes/Url2mhtmlTest.html#M000010">teardown (Url2mhtmlTest)</a><br />
33
+ <a href="classes/Url2mhtmlTest.html#M000011">test_capture (Url2mhtmlTest)</a><br />
34
+ </div>
35
+ </div>
36
+ </body>
37
+ </html>
data/doc/index.html ADDED
@@ -0,0 +1,24 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
5
+
6
+ <!--
7
+
8
+ RDoc Documentation
9
+
10
+ -->
11
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
12
+ <head>
13
+ <title>RDoc Documentation</title>
14
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
15
+ </head>
16
+ <frameset rows="20%, 80%">
17
+ <frameset cols="25%,35%,45%">
18
+ <frame src="fr_file_index.html" title="Files" name="Files" />
19
+ <frame src="fr_class_index.html" name="Classes" />
20
+ <frame src="fr_method_index.html" name="Methods" />
21
+ </frameset>
22
+ <frame src="files/lib/url2mhtml_rb.html" name="docwin" />
23
+ </frameset>
24
+ </html>
@@ -0,0 +1,208 @@
1
+
2
+ body {
3
+ font-family: Verdana,Arial,Helvetica,sans-serif;
4
+ font-size: 90%;
5
+ margin: 0;
6
+ margin-left: 40px;
7
+ padding: 0;
8
+ background: white;
9
+ }
10
+
11
+ h1,h2,h3,h4 { margin: 0; color: #efefef; background: transparent; }
12
+ h1 { font-size: 150%; }
13
+ h2,h3,h4 { margin-top: 1em; }
14
+
15
+ a { background: #eef; color: #039; text-decoration: none; }
16
+ a:hover { background: #039; color: #eef; }
17
+
18
+ /* Override the base stylesheet's Anchor inside a table cell */
19
+ td > a {
20
+ background: transparent;
21
+ color: #039;
22
+ text-decoration: none;
23
+ }
24
+
25
+ /* and inside a section title */
26
+ .section-title > a {
27
+ background: transparent;
28
+ color: #eee;
29
+ text-decoration: none;
30
+ }
31
+
32
+ /* === Structural elements =================================== */
33
+
34
+ div#index {
35
+ margin: 0;
36
+ margin-left: -40px;
37
+ padding: 0;
38
+ font-size: 90%;
39
+ }
40
+
41
+
42
+ div#index a {
43
+ margin-left: 0.7em;
44
+ }
45
+
46
+ div#index .section-bar {
47
+ margin-left: 0px;
48
+ padding-left: 0.7em;
49
+ background: #ccc;
50
+ font-size: small;
51
+ }
52
+
53
+
54
+ div#classHeader, div#fileHeader {
55
+ width: auto;
56
+ color: white;
57
+ padding: 0.5em 1.5em 0.5em 1.5em;
58
+ margin: 0;
59
+ margin-left: -40px;
60
+ border-bottom: 3px solid #006;
61
+ }
62
+
63
+ div#classHeader a, div#fileHeader a {
64
+ background: inherit;
65
+ color: white;
66
+ }
67
+
68
+ div#classHeader td, div#fileHeader td {
69
+ background: inherit;
70
+ color: white;
71
+ }
72
+
73
+
74
+ div#fileHeader {
75
+ background: #057;
76
+ }
77
+
78
+ div#classHeader {
79
+ background: #048;
80
+ }
81
+
82
+
83
+ .class-name-in-header {
84
+ font-size: 180%;
85
+ font-weight: bold;
86
+ }
87
+
88
+
89
+ div#bodyContent {
90
+ padding: 0 1.5em 0 1.5em;
91
+ }
92
+
93
+ div#description {
94
+ padding: 0.5em 1.5em;
95
+ background: #efefef;
96
+ border: 1px dotted #999;
97
+ }
98
+
99
+ div#description h1,h2,h3,h4,h5,h6 {
100
+ color: #125;;
101
+ background: transparent;
102
+ }
103
+
104
+ div#validator-badges {
105
+ text-align: center;
106
+ }
107
+ div#validator-badges img { border: 0; }
108
+
109
+ div#copyright {
110
+ color: #333;
111
+ background: #efefef;
112
+ font: 0.75em sans-serif;
113
+ margin-top: 5em;
114
+ margin-bottom: 0;
115
+ padding: 0.5em 2em;
116
+ }
117
+
118
+
119
+ /* === Classes =================================== */
120
+
121
+ table.header-table {
122
+ color: white;
123
+ font-size: small;
124
+ }
125
+
126
+ .type-note {
127
+ font-size: small;
128
+ color: #DEDEDE;
129
+ }
130
+
131
+ .xxsection-bar {
132
+ background: #eee;
133
+ color: #333;
134
+ padding: 3px;
135
+ }
136
+
137
+ .section-bar {
138
+ color: #333;
139
+ border-bottom: 1px solid #999;
140
+ margin-left: -20px;
141
+ }
142
+
143
+
144
+ .section-title {
145
+ background: #79a;
146
+ color: #eee;
147
+ padding: 3px;
148
+ margin-top: 2em;
149
+ margin-left: -30px;
150
+ border: 1px solid #999;
151
+ }
152
+
153
+ .top-aligned-row { vertical-align: top }
154
+ .bottom-aligned-row { vertical-align: bottom }
155
+
156
+ /* --- Context section classes ----------------------- */
157
+
158
+ .context-row { }
159
+ .context-item-name { font-family: monospace; font-weight: bold; color: black; }
160
+ .context-item-value { font-size: small; color: #448; }
161
+ .context-item-desc { color: #333; padding-left: 2em; }
162
+
163
+ /* --- Method classes -------------------------- */
164
+ .method-detail {
165
+ background: #efefef;
166
+ padding: 0;
167
+ margin-top: 0.5em;
168
+ margin-bottom: 1em;
169
+ border: 1px dotted #ccc;
170
+ }
171
+ .method-heading {
172
+ color: black;
173
+ background: #ccc;
174
+ border-bottom: 1px solid #666;
175
+ padding: 0.2em 0.5em 0 0.5em;
176
+ }
177
+ .method-signature { color: black; background: inherit; }
178
+ .method-name { font-weight: bold; }
179
+ .method-args { font-style: italic; }
180
+ .method-description { padding: 0 0.5em 0 0.5em; }
181
+
182
+ /* --- Source code sections -------------------- */
183
+
184
+ a.source-toggle { font-size: 90%; }
185
+ div.method-source-code {
186
+ background: #262626;
187
+ color: #ffdead;
188
+ margin: 1em;
189
+ padding: 0.5em;
190
+ border: 1px dashed #999;
191
+ overflow: hidden;
192
+ }
193
+
194
+ div.method-source-code pre { color: #ffdead; overflow: hidden; }
195
+
196
+ /* --- Ruby keyword styles --------------------- */
197
+
198
+ .standalone-code { background: #221111; color: #ffdead; overflow: hidden; }
199
+
200
+ .ruby-constant { color: #7fffd4; background: transparent; }
201
+ .ruby-keyword { color: #00ffff; background: transparent; }
202
+ .ruby-ivar { color: #eedd82; background: transparent; }
203
+ .ruby-operator { color: #00ffee; background: transparent; }
204
+ .ruby-identifier { color: #ffdead; background: transparent; }
205
+ .ruby-node { color: #ffa07a; background: transparent; }
206
+ .ruby-comment { color: #b22222; font-weight: bold; background: transparent; }
207
+ .ruby-regexp { color: #ffa07a; background: transparent; }
208
+ .ruby-value { color: #7fffd4; background: transparent; }
data/lib/url2mhtml.rb ADDED
@@ -0,0 +1,106 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'tmail'
4
+
5
+ class Url2mhtml
6
+ VERSION = '0.0.2'
7
+ ContentInfo=Struct.new(:uri,:type,:body,:is_root,:title)
8
+
9
+ def self.get_agent
10
+ user_agent_alias ='Windows IE 6'
11
+ agent = WWW::Mechanize.new
12
+ agent.user_agent_alias =user_agent_alias
13
+ agent
14
+ end
15
+
16
+ def self.get_content(uri,is_root)
17
+ got_content = get_agent.get(uri)
18
+ type=got_content.response['content-type']
19
+ body=got_content.body
20
+
21
+ title= ( is_root && /html/.match(type) ) ? got_content.title : 'no title'
22
+
23
+ content_info=ContentInfo.new(uri,type,body,is_root,title)
24
+ return content_info,got_content
25
+ end
26
+
27
+ def self.get_contents(uri,is_root,content_info_list)
28
+
29
+ content_info,got_content=get_content(uri,is_root)
30
+ content_info_list << content_info
31
+
32
+ append_relative_contents(got_content,content_info_list) if /html/.match(content_info.type)
33
+ content_info_list
34
+ end
35
+
36
+ def self.append_relative_contents(page,content_info_list)
37
+ base_uri=page.uri
38
+ raw_image_uris=page.search('//img').map{|i| i['src']}
39
+ raw_image_uris.push(*(page.search('//body').find_all{|i| i['background']}.map{|i| i['background']}))
40
+ raw_image_uris.push(*(page.search('//th').find_all{|i| i['background']}.map{|i| i['background']}))
41
+ raw_image_uris.push(*(page.search('//td').find_all{|i| i['background']}.map{|i| i['background']}))
42
+ image_uris=raw_image_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
43
+
44
+ raw_frame_uris=page.frames.map{|f| f.uri}
45
+ frame_uris=raw_frame_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
46
+
47
+ raw_iframe_uris=page.iframes.map{|f| f.uri}
48
+ iframe_uris=raw_iframe_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
49
+
50
+ raw_css_uris=page.search('link[@rel="stylesheet"]').map{|l| l['href']}
51
+ css_uris=raw_css_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
52
+
53
+ raw_script_uris=page.search('script').find_all{|s| s['src']}.map{|s| s['src']}
54
+ script_uris=raw_script_uris.map{|i| resolve_relative_uri(base_uri,i)}.grep(/^(http|ftp)/)
55
+
56
+
57
+ raw_urls = ( image_uris + frame_uris + iframe_uris + css_uris + script_uris )
58
+ target_content_urls = raw_urls.map{|u| u.gsub(/#.*/,'')}.uniq.find_all{|u| content_info_list.any?{|content| u != content.uri}}
59
+
60
+ target_content_urls.each{|uri| get_contents(uri,false,content_info_list)}
61
+ end
62
+
63
+ def self.resolve_relative_uri(base_uri,target_uri)
64
+ URI.join(base_uri.to_s,target_uri).to_s
65
+ end
66
+
67
+ def self.create_mail_part(content)
68
+ part=TMail::Mail.new
69
+
70
+ part['content-location']=content.uri
71
+ part.content_type = content.type
72
+ if /html/.match(content.type)
73
+ part.transfer_encoding = '8bit'
74
+ part.body = content.body
75
+ else
76
+ part.transfer_encoding = 'base64'
77
+ b64encoded_body = [content.body].pack('m').chomp.gsub(/.{76}/, "\\1\n")
78
+ part.body = b64encoded_body
79
+ end
80
+ part
81
+ end
82
+
83
+ def self.create_mail(title,parts)
84
+ mail=TMail::Mail.new
85
+ mail.from='url2MHTML'
86
+ mail.subject = title
87
+ mail.date = Time.now
88
+ mail.mime_version = '1.0'
89
+ mail['X-MimeOLE']='url2MHTML'
90
+
91
+ mail.body = "This is a multi-part message in MIME format.\n"
92
+
93
+ parts.each{|part| mail.parts.push(part) }
94
+
95
+ mail.content_type='multipart/related; type="text/html"'
96
+ mail
97
+ end
98
+
99
+ def self.capture(uri)
100
+ page_content_list=get_contents(uri,true,[])
101
+ title=page_content_list.find(ContentInfo.new(nil,nil,nil,nil,'no title')) { |content| content.is_root == true }.title
102
+ mail_parts=page_content_list.map{|content| create_mail_part(content)}
103
+ mail=create_mail(title,mail_parts)
104
+ mail.encoded
105
+ end
106
+ end
@@ -0,0 +1,3 @@
1
+ require 'test/unit'
2
+ require File.dirname(__FILE__) + '/../lib/url2mhtml'
3
+
@@ -0,0 +1,16 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ require "test/unit"
4
+ class Url2mhtmlTest < Test::Unit::TestCase
5
+ def setup
6
+ end
7
+
8
+ def teardown
9
+ end
10
+
11
+ def test_capture
12
+ result = Url2mhtml.capture('http://www.google.com')
13
+ assert(result, "Failure.")
14
+ end
15
+
16
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fromgit-url2mhtml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - forgithubid
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-09-17 00:00:00 -07:00
13
+ default_executable: url2mhtml
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mechanize
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 0.7.8
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: tmail
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: 1.2.3.1
32
+ version:
33
+ description: generate to MHTML from URL.
34
+ email: forgithubid@gmail.com
35
+ executables:
36
+ - url2mhtml
37
+ extensions: []
38
+
39
+ extra_rdoc_files:
40
+ - README
41
+ - ChangeLog
42
+ files:
43
+ - README
44
+ - ChangeLog
45
+ - Rakefile
46
+ - bin/url2mhtml
47
+ - doc/classes
48
+ - doc/classes/Url2mhtml.html
49
+ - doc/classes/Url2mhtml.src
50
+ - doc/classes/Url2mhtml.src/M000001.html
51
+ - doc/classes/Url2mhtml.src/M000002.html
52
+ - doc/classes/Url2mhtml.src/M000003.html
53
+ - doc/classes/Url2mhtml.src/M000004.html
54
+ - doc/classes/Url2mhtml.src/M000005.html
55
+ - doc/classes/Url2mhtml.src/M000006.html
56
+ - doc/classes/Url2mhtml.src/M000007.html
57
+ - doc/classes/Url2mhtml.src/M000008.html
58
+ - doc/classes/Url2mhtmlTest.html
59
+ - doc/classes/Url2mhtmlTest.src
60
+ - doc/classes/Url2mhtmlTest.src/M000009.html
61
+ - doc/classes/Url2mhtmlTest.src/M000010.html
62
+ - doc/classes/Url2mhtmlTest.src/M000011.html
63
+ - doc/created.rid
64
+ - doc/files
65
+ - doc/files/lib
66
+ - doc/files/lib/url2mhtml_rb.html
67
+ - doc/files/test
68
+ - doc/files/test/test_helper_rb.html
69
+ - doc/files/test/url2mhtml_test_rb.html
70
+ - doc/fr_class_index.html
71
+ - doc/fr_file_index.html
72
+ - doc/fr_method_index.html
73
+ - doc/index.html
74
+ - doc/rdoc-style.css
75
+ - test/test_helper.rb
76
+ - test/url2mhtml_test.rb
77
+ - lib/url2mhtml.rb
78
+ has_rdoc: true
79
+ homepage: http://github.com/fromgit/url2mhtml/tree/master
80
+ post_install_message:
81
+ rdoc_options: []
82
+
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: "0"
90
+ version:
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: "0"
96
+ version:
97
+ requirements: []
98
+
99
+ rubyforge_project:
100
+ rubygems_version: 1.2.0
101
+ signing_key:
102
+ specification_version: 2
103
+ summary: generate to MHTML from URL.
104
+ test_files:
105
+ - test/url2mhtml_test.rb