gorgeous 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +22 -0
- data/bin/gorgeous +126 -0
- data/lib/gorgeous.rb +241 -0
- metadata +69 -0
data/README.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Usage:
|
2
|
+
|
3
|
+
gorgeous [-i] [-F <in-format>] [-T <out-format>] [-q <query>] [-o <destination>] FILE ...
|
4
|
+
|
5
|
+
This utility converts data between different formats.
|
6
|
+
Formats are one of: json, xml, yaml, ruby, email, url
|
7
|
+
|
8
|
+
Input can be read from STDIN as well as files given on the command-line.
|
9
|
+
|
10
|
+
Options:
|
11
|
+
|
12
|
+
-i Convert file in-place
|
13
|
+
-F Input format
|
14
|
+
-T Output format
|
15
|
+
-q Query selector in format similar to XPath (see below)
|
16
|
+
-o Write output to file instead of STDOUT
|
17
|
+
|
18
|
+
Query format:
|
19
|
+
|
20
|
+
"/items/comments[1]/text" -- 2nd comment body of each item
|
21
|
+
"/items[-1]/user/full_name" -- name of user for last item
|
22
|
+
"//user/username" -- usernames of all users anywhere in the document
|
data/bin/gorgeous
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
help = <<-MSG
|
4
|
+
Usage: gorgeous [-i] [-F <in-format>] [-T <out-format>] [-q <query>] [-o <destination>] FILE ...
|
5
|
+
|
6
|
+
This utility converts data between different formats.
|
7
|
+
Formats are one of: json, xml, yaml, ruby, email, url
|
8
|
+
|
9
|
+
Input can be read from STDIN as well as files given on the command-line.
|
10
|
+
|
11
|
+
Options:
|
12
|
+
-i Convert file in-place
|
13
|
+
-F Input format
|
14
|
+
-T Output format
|
15
|
+
-q Query selector in format similar to XPath (see below)
|
16
|
+
-o Write output to file instead of STDOUT
|
17
|
+
|
18
|
+
Query format:
|
19
|
+
"/items/comments[1]/text" -- 2nd comment body of each item
|
20
|
+
"/items[-1]/user/full_name" -- name of user for last item
|
21
|
+
"//user/username" -- usernames of all users anywhere in the document
|
22
|
+
|
23
|
+
MSG
|
24
|
+
|
25
|
+
if ARGV.index('-h') or ARGV.index('--help')
|
26
|
+
puts help
|
27
|
+
exit
|
28
|
+
end
|
29
|
+
|
30
|
+
require 'gorgeous'
|
31
|
+
|
32
|
+
help_usage = help.split("\n", 2).first
|
33
|
+
|
34
|
+
options = help_usage.scan(/[^\w-](-[a-z])(?: <(.+?)>)?/i).inject({}) do |opts, (flag, value)|
|
35
|
+
if value.nil?
|
36
|
+
opts[flag] = true if ARGV.delete flag
|
37
|
+
elsif idx = ARGV.index(flag)
|
38
|
+
opts[flag] = ARGV[idx + 1]
|
39
|
+
2.times { ARGV.delete_at idx }
|
40
|
+
end
|
41
|
+
opts
|
42
|
+
end
|
43
|
+
|
44
|
+
in_place = options['-i']
|
45
|
+
query_path = options['-q']
|
46
|
+
input_format = options['-F'] && options['-F'].to_sym
|
47
|
+
output_format ||= options['-T'] && options['-T'].to_sym
|
48
|
+
destination = options['-o']
|
49
|
+
from_stdin = !$stdin.tty?
|
50
|
+
|
51
|
+
if from_stdin
|
52
|
+
if in_place
|
53
|
+
warn "can't edit stdin in place"
|
54
|
+
exit 1
|
55
|
+
end
|
56
|
+
elsif in_place and destination
|
57
|
+
warn "can't combine -i and -o flags"
|
58
|
+
exit 1
|
59
|
+
elsif ARGV.empty?
|
60
|
+
warn help_usage
|
61
|
+
exit 1
|
62
|
+
end
|
63
|
+
|
64
|
+
# clobber the input file
|
65
|
+
destination = ARGV.first if in_place
|
66
|
+
|
67
|
+
input = Gorgeous.new(ARGF, :filename => ARGV.first, :format => input_format, :query => query_path)
|
68
|
+
|
69
|
+
unless input.format
|
70
|
+
warn "unrecognized input format; specify explicit format with `-F <format>`"
|
71
|
+
exit 1
|
72
|
+
end
|
73
|
+
|
74
|
+
output_format ||= if destination
|
75
|
+
Gorgeous.filename_to_format(destination)
|
76
|
+
else
|
77
|
+
input.format
|
78
|
+
end
|
79
|
+
|
80
|
+
if output_format == :ruby
|
81
|
+
require 'pp'
|
82
|
+
output = input.data.pretty_inspect
|
83
|
+
elsif output_format == :json
|
84
|
+
require 'yajl/json_gem'
|
85
|
+
output = JSON.pretty_generate input.data
|
86
|
+
elsif output_format == :yaml
|
87
|
+
require 'yaml'
|
88
|
+
output = YAML.dump input.data
|
89
|
+
elsif input_format == :email and output_format == :xml
|
90
|
+
require 'nokogiri'
|
91
|
+
email = input.to_mail
|
92
|
+
part = email.html_part
|
93
|
+
html = Gorgeous.convert_utf8(part.decode_body, part.charset)
|
94
|
+
ugly = Nokogiri::HTML html
|
95
|
+
output = Gorgeous.pretty_xml(ugly)
|
96
|
+
elsif input_format == :email and output_format == :txt and not input.filtered?
|
97
|
+
email = input.to_mail
|
98
|
+
part = email.text_part
|
99
|
+
output = Gorgeous.convert_utf8(part.decode_body.strip, part.charset)
|
100
|
+
elsif output_format == :xml
|
101
|
+
output = Gorgeous.pretty_xml(input.to_xml)
|
102
|
+
elsif output_format == :url
|
103
|
+
require 'rack/utils'
|
104
|
+
output = Rack::Utils.build_nested_query(input.data)
|
105
|
+
elsif output_format == :txt
|
106
|
+
data = input.data
|
107
|
+
output = Array === data ? data.join("\n") : data.to_s
|
108
|
+
else
|
109
|
+
warn "don't know how to convert #{input_format} to #{output_format}"
|
110
|
+
exit 1
|
111
|
+
end
|
112
|
+
|
113
|
+
if destination
|
114
|
+
begin
|
115
|
+
File.open(destination, 'w') { |f| f << output }
|
116
|
+
rescue Errno::ENOENT
|
117
|
+
warn "error opening #{destination} for writing"
|
118
|
+
exit 1
|
119
|
+
end
|
120
|
+
else
|
121
|
+
begin
|
122
|
+
puts output
|
123
|
+
rescue Errno::EPIPE
|
124
|
+
# ignore "broken pipe" exception
|
125
|
+
end
|
126
|
+
end
|
data/lib/gorgeous.rb
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
class Gorgeous
|
2
|
+
def self.filename_to_format(filename)
|
3
|
+
case File.extname(filename)
|
4
|
+
when '.json' then :json
|
5
|
+
when '.xml', '.html' then :xml
|
6
|
+
when '.rb' then :ruby
|
7
|
+
when '.yml', '.yaml' then :yaml
|
8
|
+
when '.mail', '.email' then :email
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.pretty_xml(ugly)
|
13
|
+
tidy = Nokogiri::XSLT DATA.read
|
14
|
+
tidy.transform(ugly).to_s
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.convert_utf8(string, from_charset)
|
18
|
+
if from_charset.nil? or from_charset.downcase.tr('-', '') == 'utf8'
|
19
|
+
string
|
20
|
+
else
|
21
|
+
require 'iconv'
|
22
|
+
Iconv.conv 'utf-8', from_charset, string
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.headers_from_mail(email)
|
27
|
+
require 'active_support/ordered_hash'
|
28
|
+
require 'active_support/core_ext/object/blank'
|
29
|
+
|
30
|
+
address_field = lambda { |name|
|
31
|
+
if field = email.header[name]
|
32
|
+
values = field.addrs.map { |a|
|
33
|
+
Mail::Encodings.unquote_and_convert_to(a.format, 'utf-8')
|
34
|
+
}
|
35
|
+
values.size < 2 ? values.first : values
|
36
|
+
end
|
37
|
+
}
|
38
|
+
header_value = lambda { |name|
|
39
|
+
field = email.header[name] and field.value.to_s
|
40
|
+
}
|
41
|
+
decoded_value = lambda { |name|
|
42
|
+
field = email.header[name]
|
43
|
+
Mail::Encodings.unquote_and_convert_to(field.value, 'utf-8') if field
|
44
|
+
}
|
45
|
+
|
46
|
+
data = ActiveSupport::OrderedHash.new
|
47
|
+
data[:subject] = decoded_value['subject']
|
48
|
+
data[:from] = address_field['from']
|
49
|
+
data[:to] = address_field['to']
|
50
|
+
data[:cc] = address_field['cc']
|
51
|
+
data[:bcc] = address_field['bcc']
|
52
|
+
data[:reply_to] = address_field['reply-to']
|
53
|
+
data[:return_path] = email.return_path
|
54
|
+
|
55
|
+
data[:message_id] = email.message_id
|
56
|
+
data[:in_reply_to] = email.in_reply_to
|
57
|
+
data[:references] = email.references
|
58
|
+
|
59
|
+
data[:date] = email.date
|
60
|
+
data[:sender] = address_field['sender']
|
61
|
+
data[:delivered_to] = header_value['delivered-to']
|
62
|
+
data[:original_sender] = header_value['x-original-sender']
|
63
|
+
data[:content_type] = email.content_type.to_s.split(';', 2).first.presence
|
64
|
+
data[:precedence] = header_value['precedence']
|
65
|
+
|
66
|
+
data.tap { |hash| hash.reject! { |k,v| v.nil? } }
|
67
|
+
end
|
68
|
+
|
69
|
+
# adapted from webmock
|
70
|
+
def self.http_from_string(raw_response)
|
71
|
+
if raw_response.is_a?(IO)
|
72
|
+
string = raw_response.read
|
73
|
+
raw_response.close
|
74
|
+
raw_response = string
|
75
|
+
end
|
76
|
+
socket = ::Net::BufferedIO.new(raw_response)
|
77
|
+
response = ::Net::HTTPResponse.read_new(socket)
|
78
|
+
transfer_encoding = response.delete('transfer-encoding') # chunks were already read
|
79
|
+
response.reading_body(socket, true) {}
|
80
|
+
|
81
|
+
options = {}
|
82
|
+
options[:headers] = {}
|
83
|
+
response.each_header { |name, value| options[:headers][name] = value }
|
84
|
+
options[:headers]['transfer-encoding'] = transfer_encoding if transfer_encoding
|
85
|
+
options[:body] = response.read_body
|
86
|
+
options[:status] = [response.code.to_i, response.message]
|
87
|
+
options
|
88
|
+
end
|
89
|
+
|
90
|
+
def initialize(input, options = {})
|
91
|
+
@input = input
|
92
|
+
@format = options[:format]
|
93
|
+
@options = options
|
94
|
+
end
|
95
|
+
|
96
|
+
def filtered?
|
97
|
+
!!@options[:query]
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_s
|
101
|
+
@str ||= @input.respond_to?(:read) ? @input.read : @input
|
102
|
+
end
|
103
|
+
|
104
|
+
def format
|
105
|
+
@format ||= begin
|
106
|
+
if @options[:filename]
|
107
|
+
self.class.filename_to_format(@options[:filename])
|
108
|
+
else
|
109
|
+
guess_format
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# typically hash or array
|
115
|
+
def data
|
116
|
+
apply_query case format
|
117
|
+
when :xml
|
118
|
+
require 'active_support/core_ext/hash/conversions'
|
119
|
+
Hash.from_xml(to_s)
|
120
|
+
when :json
|
121
|
+
require 'yajl/json_gem'
|
122
|
+
JSON.parse to_s
|
123
|
+
when :yaml
|
124
|
+
require 'yaml'
|
125
|
+
YAML.load to_s
|
126
|
+
when :email
|
127
|
+
self.class.headers_from_mail to_mail
|
128
|
+
when :ruby
|
129
|
+
eval to_s # TODO: sandbox
|
130
|
+
when :url
|
131
|
+
require 'rack/utils'
|
132
|
+
Rack::Utils.parse_nested_query(to_s.strip)
|
133
|
+
when :http
|
134
|
+
require 'net/http'
|
135
|
+
self.class.http_from_string(to_s)[:headers]
|
136
|
+
else
|
137
|
+
raise ArgumentError, "don't know how to decode #{format}"
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def to_xml
|
142
|
+
require 'nokogiri'
|
143
|
+
Nokogiri to_s
|
144
|
+
end
|
145
|
+
|
146
|
+
def to_mail
|
147
|
+
require 'mail'
|
148
|
+
raw = to_s.lstrip
|
149
|
+
raw << "\n" unless raw[-1, 1] == "\n"
|
150
|
+
Mail.new raw
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
def guess_format
|
156
|
+
case to_s
|
157
|
+
when /\A\s*[\[\{]/ then :json
|
158
|
+
when /\A\s*</ then :xml
|
159
|
+
when /\A---\s/ then :yaml
|
160
|
+
when /\A\S+=\S+\Z/ then :url
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def apply_query(obj)
|
165
|
+
if filtered?
|
166
|
+
require 'active_support/core_ext/object/blank'
|
167
|
+
query = @options[:query].dup
|
168
|
+
while query.sub!(%r{(//?)(\w+)(?:\[(-?\d+)\])?}, '')
|
169
|
+
obj = filter_resultset(obj, $2, $3, $1.length == 2)
|
170
|
+
break if obj.nil?
|
171
|
+
end
|
172
|
+
end
|
173
|
+
obj
|
174
|
+
end
|
175
|
+
|
176
|
+
def filter_resultset(obj, key, idx = nil, deep = false)
|
177
|
+
if Array === obj
|
178
|
+
obj.map { |o| filter_resultset(o, key, idx, deep) }.flatten.compact.presence
|
179
|
+
elsif Hash === obj
|
180
|
+
if deep
|
181
|
+
result = obj.map do |k, value|
|
182
|
+
if k.to_s == key.to_s then value
|
183
|
+
else filter_resultset(value, key, nil, deep)
|
184
|
+
end
|
185
|
+
end.flatten.compact.presence
|
186
|
+
else
|
187
|
+
result = obj[key.to_s] || obj[key.to_sym]
|
188
|
+
end
|
189
|
+
result = result[idx.to_i] if result and idx
|
190
|
+
result
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
__END__
|
196
|
+
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
197
|
+
<xsl:output method="xml" encoding="UTF-8"/>
|
198
|
+
<xsl:param name="indent-increment" select="' '"/>
|
199
|
+
|
200
|
+
<xsl:template name="newline">
|
201
|
+
<xsl:text disable-output-escaping="yes">
|
202
|
+
</xsl:text>
|
203
|
+
</xsl:template>
|
204
|
+
|
205
|
+
<xsl:template match="comment() | processing-instruction()">
|
206
|
+
<xsl:param name="indent" select="''"/>
|
207
|
+
<xsl:call-template name="newline"/>
|
208
|
+
<xsl:value-of select="$indent"/>
|
209
|
+
<xsl:copy />
|
210
|
+
</xsl:template>
|
211
|
+
|
212
|
+
<xsl:template match="text()">
|
213
|
+
<xsl:param name="indent" select="''"/>
|
214
|
+
<xsl:call-template name="newline"/>
|
215
|
+
<xsl:value-of select="$indent"/>
|
216
|
+
<xsl:value-of select="normalize-space(.)"/>
|
217
|
+
</xsl:template>
|
218
|
+
|
219
|
+
<xsl:template match="text()[normalize-space(.)='']"/>
|
220
|
+
|
221
|
+
<xsl:template match="*">
|
222
|
+
<xsl:param name="indent" select="''"/>
|
223
|
+
<xsl:call-template name="newline"/>
|
224
|
+
<xsl:value-of select="$indent"/>
|
225
|
+
<xsl:choose>
|
226
|
+
<xsl:when test="count(child::*) > 0">
|
227
|
+
<xsl:copy>
|
228
|
+
<xsl:copy-of select="@*"/>
|
229
|
+
<xsl:apply-templates select="*|text()">
|
230
|
+
<xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
|
231
|
+
</xsl:apply-templates>
|
232
|
+
<xsl:call-template name="newline"/>
|
233
|
+
<xsl:value-of select="$indent"/>
|
234
|
+
</xsl:copy>
|
235
|
+
</xsl:when>
|
236
|
+
<xsl:otherwise>
|
237
|
+
<xsl:copy-of select="."/>
|
238
|
+
</xsl:otherwise>
|
239
|
+
</xsl:choose>
|
240
|
+
</xsl:template>
|
241
|
+
</xsl:stylesheet>
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gorgeous
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- "Mislav Marohni\xC4\x87"
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-02-10 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description:
|
23
|
+
email: mislav.marohnic@gmail.com
|
24
|
+
executables:
|
25
|
+
- gorgeous
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- bin/gorgeous
|
32
|
+
- lib/gorgeous.rb
|
33
|
+
- README.md
|
34
|
+
has_rdoc: false
|
35
|
+
homepage: http://github.com/mislav/gorgeous
|
36
|
+
licenses: []
|
37
|
+
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
|
41
|
+
require_paths:
|
42
|
+
- lib
|
43
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
none: false
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
hash: 3
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
requirements: []
|
62
|
+
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.5.0
|
65
|
+
signing_key:
|
66
|
+
specification_version: 3
|
67
|
+
summary: Convert between different data formats
|
68
|
+
test_files: []
|
69
|
+
|