podgraph 0.1.5 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,163 +0,0 @@
1
- require 'mail'
2
- require 'rexml/document'
3
- require 'yaml'
4
- require 'optparse'
5
-
6
- require_relative 'trestle'
7
- include Podgraph
8
-
9
- # :include: ../../README.rdoc
10
- module Podgraph
11
- # Reads XHTML file, analyses it, finds images, checks if they can be inlined,
12
- # generates multipart/relative or multipart/mixed MIME mail.
13
- class Posterous
14
-
15
- # some options for mail generator; change with care
16
- attr_accessor :o
17
-
18
- # a Trestle object
19
- attr_accessor :trestle
20
-
21
- # Analyses _filename_. It must be a XHTML file.
22
- # _to_, _from_ are email.
23
- # _mode_ is 1 of 'related' or 'mixed' string.
24
- def initialize(trestle, filename, to, from, mode)
25
- @o = Hash.new()
26
- @o[:user_agent] = Podgraph::Meta::NAME + ?/ + Podgraph::Meta::VERSION
27
- @o[:subject] = ''
28
- @o[:body] = []
29
- @o[:attachment] = []
30
- @o[:a_marks] = {}
31
- @o[:mode] = mode
32
- @o[:to] = to
33
- @o[:from] = from
34
-
35
- @trestle = trestle
36
-
37
- fp = (filename == STDIN ? STDIN : File.new(filename))
38
- begin
39
- make(fp)
40
- rescue
41
- raise $!
42
- ensure
43
- fp.close unless fp == STDIN
44
- end
45
- end
46
-
47
- def make(fp)
48
- xml = REXML::Document.new(fp)
49
- begin
50
- @o[:subject].replace(REXML::XPath.first(xml, "/html/body/div/h1").text.gsub(/\s+/, " "))
51
- raise if @o[:subject] =~ /^\s*$/
52
- rescue
53
- raise 'cannot extract the subject from <h1>'
54
- end
55
-
56
- img_collect = ->(i, a) {
57
- if i.name == 'img'
58
- if (src = i.attributes['src']) =~ /^\s*$/
59
- raise '<img> tag with missing or empty src attribute'
60
- elsif src =~ /\s*(https?|s?ftp):\/\//
61
- # we are ignoring URL's
62
- return
63
- else
64
- a << src
65
- if @o[:mode] == 'related'
66
- # replace src attribute with a random chars--later
67
- # we'll replace such marks with corrent content-id
68
- random = Mail.random_tag()
69
- i.attributes['src'] = random
70
- @o[:a_marks][src] = random # save an act of the replacement
71
-
72
- @o.rehash() # is this really necessary?
73
- end
74
- end
75
- end
76
- }
77
-
78
- f = 1
79
- xml.elements.each('/html/body/div/*') { |i|
80
- if f == 1
81
- f = 0 # skip first <h1>
82
- next
83
- end
84
-
85
- @trestle.veputs(2, "node: #{i.name}")
86
- img_collect.call(i, @o[:attachment])
87
- i.each_recursive { |j|
88
- @trestle.veputs(2, "node recursive: #{j.name}")
89
- img_collect.call(j, @o[:attachment])
90
- }
91
-
92
- @o[:body] << i
93
- }
94
-
95
- raise "body is empty or filled with nonsence" if @o[:body].size == 0
96
- end
97
- private :make
98
-
99
- # Returns ready for delivery Mail object.
100
- def generate()
101
- m = Mail.new()
102
- m.from(@o[:from])
103
- m.to(@o[:to])
104
- m.transport_encoding = Mail::Encodings.get_encoding('8bit')
105
- # m.content_transfer_encoding('8bit')
106
- m.subject(@o[:subject])
107
- m.headers({'User-Agent' => @o[:user_agent]})
108
-
109
- @trestle.veputs(2, "Body lines=#{@o[:body].size}, bytes=#{@o[:body].to_s.bytesize}")
110
- if @o[:attachment].size == 0
111
- m.content_disposition('inline')
112
- m.content_type('text/html; charset="UTF-8"')
113
- m.body(@o[:body])
114
- else
115
- if @o[:mode] == 'related'
116
- m.content_type('Multipart/Related')
117
- end
118
- m.html_part = Mail::Part.new {
119
- content_type('text/html; charset=UTF-8')
120
- }
121
- m.html_part.body = @o[:body]
122
- m.html_part.content_disposition('inline') if @o[:mode] == 'mixed'
123
-
124
- begin
125
- @o[:attachment].each { |i| m.add_file(i) }
126
- rescue
127
- raise("cannot attach: #{$!}")
128
- end
129
-
130
- if @o[:mode] == 'related'
131
- if (fqdn = Socket.gethostname() ) == ''
132
- raise 'hostname is not set!'
133
- end
134
- cid = {}
135
- m.parts[1..-1].each { |i|
136
- i.content_disposition('inline')
137
- cid[i.filename] = i.content_id("<#{Mail.random_tag}@#{fqdn}.NO_mail>")
138
- }
139
-
140
- @o[:a_marks].each { |k, v|
141
- if cid.key?(k)
142
- @trestle.veputs(2, "mark #{k} = #{v}; -> to #{cid[k]}")
143
- # replace marks with corresponding content-id
144
- m.html_part.body.raw_source.sub!(v, "cid:#{cid[k][1..-1]}")
145
- else
146
- raise("orphan key in cid: #{k}")
147
- end
148
- }
149
- end
150
- end # a.size
151
-
152
- return m
153
- end
154
-
155
- # Print Mail object to stdout.
156
- # _e_ is an optional encoding.
157
- def dump(e = '')
158
- puts (e == '' ? generate().to_s : generate().to_s.encode(e))
159
- end
160
-
161
- end # Posterous
162
-
163
- end # Podgraph
@@ -1,216 +0,0 @@
1
- # :erb:
2
- require 'yaml'
3
- require 'shellwords.rb'
4
- require 'optparse'
5
- require 'pp'
6
- require 'open4'
7
-
8
- require_relative 'meta'
9
-
10
- # :include: ../../README.rdoc
11
- module Podgraph
12
-
13
- class Trestle
14
-
15
- # Execute _cmd_ and return a list [exit_status, stderr,
16
- # stdout]. Very handy.
17
- def self.cmd_run(cmd)
18
- so = sr = ''
19
- status = Open4::popen4(cmd) { |pid, stdin, stdout, stderr|
20
- so = stdout.read
21
- sr = stderr.read
22
- }
23
- [status.exitstatus, sr, so]
24
- end
25
-
26
- # Return a directory with program libraries.
27
- def self.gem_libdir
28
- t = ["#{File.dirname(File.expand_path($0))}/../lib/#{Podgraph::Meta::NAME}",
29
- "#{Gem.dir}/gems/#{Podgraph::Meta::NAME}-#{Podgraph::Meta::VERSION}/lib/#{Podgraph::Meta::NAME}",
30
- "lib/#{Podgraph::Meta::NAME}"]
31
- t.each {|i| return i if File.readable?(i) }
32
- fail "all paths are invalid: #{t}"
33
- end
34
-
35
- # Analogue to shell command +which+.
36
- def self.in_path?(file)
37
- return true if file =~ %r%\A/% and File.exist? file
38
-
39
- ENV['PATH'].split(File::PATH_SEPARATOR).any? do |path|
40
- File.exist? File.join(path, file)
41
- end
42
- end
43
-
44
- # Print an error message _t_ and exit if _ec_ > 0.
45
- def self.errx(ec, t)
46
- STDERR.puts File.basename($0) + ' error: ' + t.to_s
47
- exit ec if ec > 0
48
- end
49
-
50
- # Print a warning.
51
- def self.warnx(t)
52
- STDERR.puts File.basename($0) + ' warning: ' + t.to_s
53
- end
54
-
55
- # #veputs uses this to decide to put a newline or not to put.
56
- NNL_MARK = '__NNL__'
57
-
58
- # Use this in your CL options to check if modifying some variable is
59
- # not an idempotent act.
60
- attr_reader :cl_opt_protect
61
-
62
- # [conf] Typically must be a reference to some global variable.
63
- def initialize(conf)
64
- @conf = conf
65
- @conf[:verbose] = 0
66
- @conf[:banner] = "Usage: #{File.basename($0)} [options]"
67
- @conf[:config] = Meta::NAME + '.yaml'
68
- @conf[:config_dirs] = [ENV['HOME']+'/.'+Meta::NAME,
69
- File.absolute_path("#{File.dirname(File.expand_path($0))}/../etc"),
70
- '/usr/etc', '/usr/local/etc', '/etc',
71
- "#{Gem.dir}/gems/#{Meta::NAME}-#{Meta::VERSION}/etc"
72
- ]
73
- @conf[:config_env] = [Meta::NAME.upcase + '_CONF']
74
-
75
- @cl_parsing_times = 0 # not used
76
- @cl_opt_protect = false
77
- end
78
-
79
- # [level] A verbose level.
80
- # [t] A string to print.
81
- #
82
- # Don't print _t_ with a newline if it contains NNL_MARK at the end.
83
- def veputs(level, t)
84
- t = t.dup
85
- nnl = nil
86
- if t.match(/#{NNL_MARK}$/)
87
- t.sub!(/#{$&}/, '')
88
- nnl = 1
89
- end
90
-
91
- if @conf[:verbose] >= level
92
- nnl ? print(t) : puts(t)
93
- STDOUT.flush
94
- end
95
- end
96
-
97
- # Run all configuration parsing in a batch.
98
- #
99
- # [rvars] A list of variable names which must be in the
100
- # configuration file.
101
- #
102
- # If no block is given, only standard CL options will be analysed.
103
- def config_parse(rvars, &block)
104
- cb = ->(b, src) {
105
- if b
106
- block.call src
107
- else
108
- # very basic default options
109
- cl_parse(src, nil, true)
110
- end
111
- }
112
-
113
- # 1. parse env
114
- @conf[:config_env].each {|i|
115
- # puts '0 run:'
116
- cb.call(block_given?, ENV[i].shellsplit) if ENV.key?(i)
117
- }
118
-
119
- # 2. parse CL in case of '--config' option
120
- # puts "\n1 run"
121
- @cl_opt_protect = true
122
- cb.call(block_given?, ARGV.dup)
123
- @cl_opt_protect = false
124
-
125
- # 3. load the configuration file & do the final CL parsing
126
- begin
127
- # puts "\n2 run"
128
- r = config_flat_load(rvars)
129
- rescue
130
- Trestle.errx(1, "cannot load config: #{$!}")
131
- end
132
- veputs(1, "Loaded config: #{r}")
133
- cb.call(block_given?, ARGV)
134
- end
135
-
136
- # Load a config file immediately if it contains '/' in its name,
137
- # otherwise search through several dirs for it.
138
- #
139
- # [rvars] a list of requied variables in the config
140
- #
141
- # Return a loaded filename or nil on error.
142
- def config_flat_load(rvars)
143
- p = ->(f) {
144
- if File.readable?(f)
145
- begin
146
- myconf = YAML.load_file(f)
147
- rescue
148
- abort("cannot parse #{f}: #{$!}")
149
- end
150
- rvars.each { |i|
151
- fail "missing or nil '#{i}' in #{f}" if ! myconf.key?(i.to_sym) || ! myconf[i.to_sym]
152
- }
153
- @conf.merge!(myconf)
154
- return @conf[:config]
155
- end
156
- return nil
157
- }
158
-
159
- if @conf[:config].index('/')
160
- return p.call(@config[:config])
161
- else
162
- @conf[:config_dirs].each {|dir|
163
- return dir+'/'+@conf[:config] if p.call(dir + '/' + @conf[:config])
164
- }
165
- end
166
-
167
- return nil
168
- end
169
-
170
-
171
- # Parses CL-like options.
172
- #
173
- # [src] An array of options (usually +ARGV+).
174
- #
175
- # If _o_ is non nil function parses _src_ immediately, otherwise it
176
- # only creates +OptionParser+ object and return it (if _simple_ is
177
- # false).
178
- def cl_parse(src, o = nil, simple = false)
179
- if ! o then
180
- # puts "NEW o (#{cl_opt_protect})" + src.to_s
181
- o = OptionParser.new
182
- o.banner = @conf[:banner]
183
- o.on('-v', 'Be more verbose.') { |i|
184
- # puts "cl_parsing_times "+cl_parsing_times.to_s
185
- @conf[:verbose] += 1 unless cl_opt_protect
186
- }
187
- o.on('-V', 'Show version & exit.') { |i|
188
- puts Meta::VERSION
189
- exit 0
190
- }
191
- o.on('--config NAME', "Set a config name (default is #{@conf[:config]})") {|i|
192
- @conf[:config] = i
193
- }
194
- o.on('--config-dirs', 'Show possible config locations') {
195
- @conf[:config_dirs].each { |j|
196
- f = j + '/' + @conf[:config]
197
- puts (File.readable?(f) ? '* ' : ' ') + f
198
- }
199
- exit 0
200
- }
201
-
202
- return o if ! simple
203
- end
204
-
205
- begin
206
- o.parse!(src)
207
- @cl_parsing_times += 1
208
- rescue
209
- Trestle.errx(1, $!.to_s)
210
- end
211
- end
212
-
213
- end # trestle
214
- end
215
-
216
- # Don't remove this: falsework/0.2.2/naive/2010-12-26T04:50:00+02:00
data/test/.document DELETED
File without changes
data/test/blue.png DELETED
Binary file
data/test/config.yaml DELETED
@@ -1,3 +0,0 @@
1
- :to: alex@goliard
2
- # :to: post@podgraph-test.posterous.com
3
- :from: alexander.gromnitsky@gmail.com
data/test/empty.html DELETED
File without changes
data/test/garbage_01.html DELETED
@@ -1 +0,0 @@
1
- fukuyama
data/test/garbage_02.html DELETED
@@ -1 +0,0 @@
1
- <html><lolipop>1</lolipop>
data/test/garbage_03.html DELETED
@@ -1,8 +0,0 @@
1
- <html>
2
- <body>
3
- <div>
4
- <h1>Really, dude</h1>
5
- This doesn't work.
6
- </div>
7
- </body>
8
- </html>
data/test/garbage_04.html DELETED
@@ -1,9 +0,0 @@
1
- <html>
2
- <body>
3
- <div>
4
- <h1>zzz</h1>
5
- <p>Missing <img src='yobo'/> inline image.</p>
6
- (hidden text)
7
- </div>
8
- </body>
9
- </html>
data/test/helper.rb DELETED
@@ -1,3 +0,0 @@
1
- require_relative 'helper_trestle'
2
-
3
- require 'digest/md5'
@@ -1,37 +0,0 @@
1
- # :erb:
2
- # Various staff for minitest. Include this file into your 'helper.rb'.
3
-
4
- require 'fileutils'
5
- include FileUtils
6
-
7
- require_relative '../lib/podgraph/trestle'
8
- include Podgraph
9
-
10
- # don't run tests automatically if they were invoked as 'gem check -t ...'
11
- if $0 =~ /gem/
12
- require 'minitest/unit'
13
- else
14
- require 'minitest/autorun'
15
- end
16
-
17
- # Return the right directory for (probably executable) _c_.
18
- def cmd(c)
19
- case File.basename(Dir.pwd)
20
- when Meta::NAME.downcase
21
- # test probably is executed from the Rakefile
22
- Dir.chdir('test')
23
- when 'test'
24
- # we are in the test directory, there is nothing special to do
25
- else
26
- # tests were invoked by 'gem check -t podgraph'
27
- begin
28
- Dir.chdir(Trestle.gem_libdir + '/../../test')
29
- rescue
30
- raise "running tests from '#{Dir.pwd}' isn't supported: #{$!}"
31
- end
32
- end
33
-
34
- '../bin/' + c
35
- end
36
-
37
- # Don't remove this: falsework/0.2.2/naive/2010-12-26T04:50:00+02:00