podgraph 0.1.5 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +49 -0
- data/package.gemspec +33 -0
- data/podgraph +5 -0
- data/podgraph.rb +105 -0
- metadata +64 -88
- data/LICENSE +0 -22
- data/NEWS +0 -41
- data/README.rdoc +0 -99
- data/Rakefile +0 -44
- data/TODO +0 -7
- data/bin/podgraph +0 -78
- data/lib/podgraph/meta.rb +0 -9
- data/lib/podgraph/posterous.rb +0 -163
- data/lib/podgraph/trestle.rb +0 -216
- data/test/.document +0 -0
- data/test/blue.png +0 -0
- data/test/config.yaml +0 -3
- data/test/empty.html +0 -0
- data/test/garbage_01.html +0 -1
- data/test/garbage_02.html +0 -1
- data/test/garbage_03.html +0 -8
- data/test/garbage_04.html +0 -9
- data/test/helper.rb +0 -3
- data/test/helper_trestle.rb +0 -37
- data/test/mechanical-turk/1.html +0 -329
- data/test/mechanical-turk/2.html +0 -320
- data/test/mechanical-turk/3.html +0 -320
- data/test/mechanical-turk/Baby-Bunnie.jpg +0 -0
- data/test/mechanical-turk/config.yaml +0 -3
- data/test/mechanical-turk/sun.jpg +0 -0
- data/test/nosubject.html +0 -5
- data/test/rake_git.rb +0 -36
- data/test/related.html +0 -15
- data/test/simple.html +0 -9
- data/test/test_mime.rb +0 -132
- data/test/yellow.png +0 -0
data/lib/podgraph/posterous.rb
DELETED
@@ -1,163 +0,0 @@
|
|
1
|
-
require 'mail'
|
2
|
-
require 'rexml/document'
|
3
|
-
require 'yaml'
|
4
|
-
require 'optparse'
|
5
|
-
|
6
|
-
require_relative 'trestle'
|
7
|
-
include Podgraph
|
8
|
-
|
9
|
-
# :include: ../../README.rdoc
|
10
|
-
module Podgraph
|
11
|
-
# Reads XHTML file, analyses it, finds images, checks if they can be inlined,
|
12
|
-
# generates multipart/relative or multipart/mixed MIME mail.
|
13
|
-
class Posterous
|
14
|
-
|
15
|
-
# some options for mail generator; change with care
|
16
|
-
attr_accessor :o
|
17
|
-
|
18
|
-
# a Trestle object
|
19
|
-
attr_accessor :trestle
|
20
|
-
|
21
|
-
# Analyses _filename_. It must be a XHTML file.
|
22
|
-
# _to_, _from_ are email.
|
23
|
-
# _mode_ is 1 of 'related' or 'mixed' string.
|
24
|
-
def initialize(trestle, filename, to, from, mode)
|
25
|
-
@o = Hash.new()
|
26
|
-
@o[:user_agent] = Podgraph::Meta::NAME + ?/ + Podgraph::Meta::VERSION
|
27
|
-
@o[:subject] = ''
|
28
|
-
@o[:body] = []
|
29
|
-
@o[:attachment] = []
|
30
|
-
@o[:a_marks] = {}
|
31
|
-
@o[:mode] = mode
|
32
|
-
@o[:to] = to
|
33
|
-
@o[:from] = from
|
34
|
-
|
35
|
-
@trestle = trestle
|
36
|
-
|
37
|
-
fp = (filename == STDIN ? STDIN : File.new(filename))
|
38
|
-
begin
|
39
|
-
make(fp)
|
40
|
-
rescue
|
41
|
-
raise $!
|
42
|
-
ensure
|
43
|
-
fp.close unless fp == STDIN
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def make(fp)
|
48
|
-
xml = REXML::Document.new(fp)
|
49
|
-
begin
|
50
|
-
@o[:subject].replace(REXML::XPath.first(xml, "/html/body/div/h1").text.gsub(/\s+/, " "))
|
51
|
-
raise if @o[:subject] =~ /^\s*$/
|
52
|
-
rescue
|
53
|
-
raise 'cannot extract the subject from <h1>'
|
54
|
-
end
|
55
|
-
|
56
|
-
img_collect = ->(i, a) {
|
57
|
-
if i.name == 'img'
|
58
|
-
if (src = i.attributes['src']) =~ /^\s*$/
|
59
|
-
raise '<img> tag with missing or empty src attribute'
|
60
|
-
elsif src =~ /\s*(https?|s?ftp):\/\//
|
61
|
-
# we are ignoring URL's
|
62
|
-
return
|
63
|
-
else
|
64
|
-
a << src
|
65
|
-
if @o[:mode] == 'related'
|
66
|
-
# replace src attribute with a random chars--later
|
67
|
-
# we'll replace such marks with corrent content-id
|
68
|
-
random = Mail.random_tag()
|
69
|
-
i.attributes['src'] = random
|
70
|
-
@o[:a_marks][src] = random # save an act of the replacement
|
71
|
-
|
72
|
-
@o.rehash() # is this really necessary?
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
}
|
77
|
-
|
78
|
-
f = 1
|
79
|
-
xml.elements.each('/html/body/div/*') { |i|
|
80
|
-
if f == 1
|
81
|
-
f = 0 # skip first <h1>
|
82
|
-
next
|
83
|
-
end
|
84
|
-
|
85
|
-
@trestle.veputs(2, "node: #{i.name}")
|
86
|
-
img_collect.call(i, @o[:attachment])
|
87
|
-
i.each_recursive { |j|
|
88
|
-
@trestle.veputs(2, "node recursive: #{j.name}")
|
89
|
-
img_collect.call(j, @o[:attachment])
|
90
|
-
}
|
91
|
-
|
92
|
-
@o[:body] << i
|
93
|
-
}
|
94
|
-
|
95
|
-
raise "body is empty or filled with nonsence" if @o[:body].size == 0
|
96
|
-
end
|
97
|
-
private :make
|
98
|
-
|
99
|
-
# Returns ready for delivery Mail object.
|
100
|
-
def generate()
|
101
|
-
m = Mail.new()
|
102
|
-
m.from(@o[:from])
|
103
|
-
m.to(@o[:to])
|
104
|
-
m.transport_encoding = Mail::Encodings.get_encoding('8bit')
|
105
|
-
# m.content_transfer_encoding('8bit')
|
106
|
-
m.subject(@o[:subject])
|
107
|
-
m.headers({'User-Agent' => @o[:user_agent]})
|
108
|
-
|
109
|
-
@trestle.veputs(2, "Body lines=#{@o[:body].size}, bytes=#{@o[:body].to_s.bytesize}")
|
110
|
-
if @o[:attachment].size == 0
|
111
|
-
m.content_disposition('inline')
|
112
|
-
m.content_type('text/html; charset="UTF-8"')
|
113
|
-
m.body(@o[:body])
|
114
|
-
else
|
115
|
-
if @o[:mode] == 'related'
|
116
|
-
m.content_type('Multipart/Related')
|
117
|
-
end
|
118
|
-
m.html_part = Mail::Part.new {
|
119
|
-
content_type('text/html; charset=UTF-8')
|
120
|
-
}
|
121
|
-
m.html_part.body = @o[:body]
|
122
|
-
m.html_part.content_disposition('inline') if @o[:mode] == 'mixed'
|
123
|
-
|
124
|
-
begin
|
125
|
-
@o[:attachment].each { |i| m.add_file(i) }
|
126
|
-
rescue
|
127
|
-
raise("cannot attach: #{$!}")
|
128
|
-
end
|
129
|
-
|
130
|
-
if @o[:mode] == 'related'
|
131
|
-
if (fqdn = Socket.gethostname() ) == ''
|
132
|
-
raise 'hostname is not set!'
|
133
|
-
end
|
134
|
-
cid = {}
|
135
|
-
m.parts[1..-1].each { |i|
|
136
|
-
i.content_disposition('inline')
|
137
|
-
cid[i.filename] = i.content_id("<#{Mail.random_tag}@#{fqdn}.NO_mail>")
|
138
|
-
}
|
139
|
-
|
140
|
-
@o[:a_marks].each { |k, v|
|
141
|
-
if cid.key?(k)
|
142
|
-
@trestle.veputs(2, "mark #{k} = #{v}; -> to #{cid[k]}")
|
143
|
-
# replace marks with corresponding content-id
|
144
|
-
m.html_part.body.raw_source.sub!(v, "cid:#{cid[k][1..-1]}")
|
145
|
-
else
|
146
|
-
raise("orphan key in cid: #{k}")
|
147
|
-
end
|
148
|
-
}
|
149
|
-
end
|
150
|
-
end # a.size
|
151
|
-
|
152
|
-
return m
|
153
|
-
end
|
154
|
-
|
155
|
-
# Print Mail object to stdout.
|
156
|
-
# _e_ is an optional encoding.
|
157
|
-
def dump(e = '')
|
158
|
-
puts (e == '' ? generate().to_s : generate().to_s.encode(e))
|
159
|
-
end
|
160
|
-
|
161
|
-
end # Posterous
|
162
|
-
|
163
|
-
end # Podgraph
|
data/lib/podgraph/trestle.rb
DELETED
@@ -1,216 +0,0 @@
|
|
1
|
-
# :erb:
|
2
|
-
require 'yaml'
|
3
|
-
require 'shellwords.rb'
|
4
|
-
require 'optparse'
|
5
|
-
require 'pp'
|
6
|
-
require 'open4'
|
7
|
-
|
8
|
-
require_relative 'meta'
|
9
|
-
|
10
|
-
# :include: ../../README.rdoc
|
11
|
-
module Podgraph
|
12
|
-
|
13
|
-
class Trestle
|
14
|
-
|
15
|
-
# Execute _cmd_ and return a list [exit_status, stderr,
|
16
|
-
# stdout]. Very handy.
|
17
|
-
def self.cmd_run(cmd)
|
18
|
-
so = sr = ''
|
19
|
-
status = Open4::popen4(cmd) { |pid, stdin, stdout, stderr|
|
20
|
-
so = stdout.read
|
21
|
-
sr = stderr.read
|
22
|
-
}
|
23
|
-
[status.exitstatus, sr, so]
|
24
|
-
end
|
25
|
-
|
26
|
-
# Return a directory with program libraries.
|
27
|
-
def self.gem_libdir
|
28
|
-
t = ["#{File.dirname(File.expand_path($0))}/../lib/#{Podgraph::Meta::NAME}",
|
29
|
-
"#{Gem.dir}/gems/#{Podgraph::Meta::NAME}-#{Podgraph::Meta::VERSION}/lib/#{Podgraph::Meta::NAME}",
|
30
|
-
"lib/#{Podgraph::Meta::NAME}"]
|
31
|
-
t.each {|i| return i if File.readable?(i) }
|
32
|
-
fail "all paths are invalid: #{t}"
|
33
|
-
end
|
34
|
-
|
35
|
-
# Analogue to shell command +which+.
|
36
|
-
def self.in_path?(file)
|
37
|
-
return true if file =~ %r%\A/% and File.exist? file
|
38
|
-
|
39
|
-
ENV['PATH'].split(File::PATH_SEPARATOR).any? do |path|
|
40
|
-
File.exist? File.join(path, file)
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
# Print an error message _t_ and exit if _ec_ > 0.
|
45
|
-
def self.errx(ec, t)
|
46
|
-
STDERR.puts File.basename($0) + ' error: ' + t.to_s
|
47
|
-
exit ec if ec > 0
|
48
|
-
end
|
49
|
-
|
50
|
-
# Print a warning.
|
51
|
-
def self.warnx(t)
|
52
|
-
STDERR.puts File.basename($0) + ' warning: ' + t.to_s
|
53
|
-
end
|
54
|
-
|
55
|
-
# #veputs uses this to decide to put a newline or not to put.
|
56
|
-
NNL_MARK = '__NNL__'
|
57
|
-
|
58
|
-
# Use this in your CL options to check if modifying some variable is
|
59
|
-
# not an idempotent act.
|
60
|
-
attr_reader :cl_opt_protect
|
61
|
-
|
62
|
-
# [conf] Typically must be a reference to some global variable.
|
63
|
-
def initialize(conf)
|
64
|
-
@conf = conf
|
65
|
-
@conf[:verbose] = 0
|
66
|
-
@conf[:banner] = "Usage: #{File.basename($0)} [options]"
|
67
|
-
@conf[:config] = Meta::NAME + '.yaml'
|
68
|
-
@conf[:config_dirs] = [ENV['HOME']+'/.'+Meta::NAME,
|
69
|
-
File.absolute_path("#{File.dirname(File.expand_path($0))}/../etc"),
|
70
|
-
'/usr/etc', '/usr/local/etc', '/etc',
|
71
|
-
"#{Gem.dir}/gems/#{Meta::NAME}-#{Meta::VERSION}/etc"
|
72
|
-
]
|
73
|
-
@conf[:config_env] = [Meta::NAME.upcase + '_CONF']
|
74
|
-
|
75
|
-
@cl_parsing_times = 0 # not used
|
76
|
-
@cl_opt_protect = false
|
77
|
-
end
|
78
|
-
|
79
|
-
# [level] A verbose level.
|
80
|
-
# [t] A string to print.
|
81
|
-
#
|
82
|
-
# Don't print _t_ with a newline if it contains NNL_MARK at the end.
|
83
|
-
def veputs(level, t)
|
84
|
-
t = t.dup
|
85
|
-
nnl = nil
|
86
|
-
if t.match(/#{NNL_MARK}$/)
|
87
|
-
t.sub!(/#{$&}/, '')
|
88
|
-
nnl = 1
|
89
|
-
end
|
90
|
-
|
91
|
-
if @conf[:verbose] >= level
|
92
|
-
nnl ? print(t) : puts(t)
|
93
|
-
STDOUT.flush
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
# Run all configuration parsing in a batch.
|
98
|
-
#
|
99
|
-
# [rvars] A list of variable names which must be in the
|
100
|
-
# configuration file.
|
101
|
-
#
|
102
|
-
# If no block is given, only standard CL options will be analysed.
|
103
|
-
def config_parse(rvars, &block)
|
104
|
-
cb = ->(b, src) {
|
105
|
-
if b
|
106
|
-
block.call src
|
107
|
-
else
|
108
|
-
# very basic default options
|
109
|
-
cl_parse(src, nil, true)
|
110
|
-
end
|
111
|
-
}
|
112
|
-
|
113
|
-
# 1. parse env
|
114
|
-
@conf[:config_env].each {|i|
|
115
|
-
# puts '0 run:'
|
116
|
-
cb.call(block_given?, ENV[i].shellsplit) if ENV.key?(i)
|
117
|
-
}
|
118
|
-
|
119
|
-
# 2. parse CL in case of '--config' option
|
120
|
-
# puts "\n1 run"
|
121
|
-
@cl_opt_protect = true
|
122
|
-
cb.call(block_given?, ARGV.dup)
|
123
|
-
@cl_opt_protect = false
|
124
|
-
|
125
|
-
# 3. load the configuration file & do the final CL parsing
|
126
|
-
begin
|
127
|
-
# puts "\n2 run"
|
128
|
-
r = config_flat_load(rvars)
|
129
|
-
rescue
|
130
|
-
Trestle.errx(1, "cannot load config: #{$!}")
|
131
|
-
end
|
132
|
-
veputs(1, "Loaded config: #{r}")
|
133
|
-
cb.call(block_given?, ARGV)
|
134
|
-
end
|
135
|
-
|
136
|
-
# Load a config file immediately if it contains '/' in its name,
|
137
|
-
# otherwise search through several dirs for it.
|
138
|
-
#
|
139
|
-
# [rvars] a list of requied variables in the config
|
140
|
-
#
|
141
|
-
# Return a loaded filename or nil on error.
|
142
|
-
def config_flat_load(rvars)
|
143
|
-
p = ->(f) {
|
144
|
-
if File.readable?(f)
|
145
|
-
begin
|
146
|
-
myconf = YAML.load_file(f)
|
147
|
-
rescue
|
148
|
-
abort("cannot parse #{f}: #{$!}")
|
149
|
-
end
|
150
|
-
rvars.each { |i|
|
151
|
-
fail "missing or nil '#{i}' in #{f}" if ! myconf.key?(i.to_sym) || ! myconf[i.to_sym]
|
152
|
-
}
|
153
|
-
@conf.merge!(myconf)
|
154
|
-
return @conf[:config]
|
155
|
-
end
|
156
|
-
return nil
|
157
|
-
}
|
158
|
-
|
159
|
-
if @conf[:config].index('/')
|
160
|
-
return p.call(@config[:config])
|
161
|
-
else
|
162
|
-
@conf[:config_dirs].each {|dir|
|
163
|
-
return dir+'/'+@conf[:config] if p.call(dir + '/' + @conf[:config])
|
164
|
-
}
|
165
|
-
end
|
166
|
-
|
167
|
-
return nil
|
168
|
-
end
|
169
|
-
|
170
|
-
|
171
|
-
# Parses CL-like options.
|
172
|
-
#
|
173
|
-
# [src] An array of options (usually +ARGV+).
|
174
|
-
#
|
175
|
-
# If _o_ is non nil function parses _src_ immediately, otherwise it
|
176
|
-
# only creates +OptionParser+ object and return it (if _simple_ is
|
177
|
-
# false).
|
178
|
-
def cl_parse(src, o = nil, simple = false)
|
179
|
-
if ! o then
|
180
|
-
# puts "NEW o (#{cl_opt_protect})" + src.to_s
|
181
|
-
o = OptionParser.new
|
182
|
-
o.banner = @conf[:banner]
|
183
|
-
o.on('-v', 'Be more verbose.') { |i|
|
184
|
-
# puts "cl_parsing_times "+cl_parsing_times.to_s
|
185
|
-
@conf[:verbose] += 1 unless cl_opt_protect
|
186
|
-
}
|
187
|
-
o.on('-V', 'Show version & exit.') { |i|
|
188
|
-
puts Meta::VERSION
|
189
|
-
exit 0
|
190
|
-
}
|
191
|
-
o.on('--config NAME', "Set a config name (default is #{@conf[:config]})") {|i|
|
192
|
-
@conf[:config] = i
|
193
|
-
}
|
194
|
-
o.on('--config-dirs', 'Show possible config locations') {
|
195
|
-
@conf[:config_dirs].each { |j|
|
196
|
-
f = j + '/' + @conf[:config]
|
197
|
-
puts (File.readable?(f) ? '* ' : ' ') + f
|
198
|
-
}
|
199
|
-
exit 0
|
200
|
-
}
|
201
|
-
|
202
|
-
return o if ! simple
|
203
|
-
end
|
204
|
-
|
205
|
-
begin
|
206
|
-
o.parse!(src)
|
207
|
-
@cl_parsing_times += 1
|
208
|
-
rescue
|
209
|
-
Trestle.errx(1, $!.to_s)
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
end # trestle
|
214
|
-
end
|
215
|
-
|
216
|
-
# Don't remove this: falsework/0.2.2/naive/2010-12-26T04:50:00+02:00
|
data/test/.document
DELETED
File without changes
|
data/test/blue.png
DELETED
Binary file
|
data/test/config.yaml
DELETED
data/test/empty.html
DELETED
File without changes
|
data/test/garbage_01.html
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
fukuyama
|
data/test/garbage_02.html
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
<html><lolipop>1</lolipop>
|
data/test/garbage_03.html
DELETED
data/test/garbage_04.html
DELETED
data/test/helper.rb
DELETED
data/test/helper_trestle.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
# :erb:
|
2
|
-
# Various staff for minitest. Include this file into your 'helper.rb'.
|
3
|
-
|
4
|
-
require 'fileutils'
|
5
|
-
include FileUtils
|
6
|
-
|
7
|
-
require_relative '../lib/podgraph/trestle'
|
8
|
-
include Podgraph
|
9
|
-
|
10
|
-
# don't run tests automatically if they were invoked as 'gem check -t ...'
|
11
|
-
if $0 =~ /gem/
|
12
|
-
require 'minitest/unit'
|
13
|
-
else
|
14
|
-
require 'minitest/autorun'
|
15
|
-
end
|
16
|
-
|
17
|
-
# Return the right directory for (probably executable) _c_.
|
18
|
-
def cmd(c)
|
19
|
-
case File.basename(Dir.pwd)
|
20
|
-
when Meta::NAME.downcase
|
21
|
-
# test probably is executed from the Rakefile
|
22
|
-
Dir.chdir('test')
|
23
|
-
when 'test'
|
24
|
-
# we are in the test directory, there is nothing special to do
|
25
|
-
else
|
26
|
-
# tests were invoked by 'gem check -t podgraph'
|
27
|
-
begin
|
28
|
-
Dir.chdir(Trestle.gem_libdir + '/../../test')
|
29
|
-
rescue
|
30
|
-
raise "running tests from '#{Dir.pwd}' isn't supported: #{$!}"
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
'../bin/' + c
|
35
|
-
end
|
36
|
-
|
37
|
-
# Don't remove this: falsework/0.2.2/naive/2010-12-26T04:50:00+02:00
|