gorgeous 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README.md +22 -0
  2. data/bin/gorgeous +126 -0
  3. data/lib/gorgeous.rb +241 -0
  4. metadata +69 -0
data/README.md ADDED
@@ -0,0 +1,22 @@
1
+ Usage:
2
+
3
+ gorgeous [-i] [-F <in-format>] [-T <out-format>] [-q <query>] [-o <destination>] FILE ...
4
+
5
+ This utility converts data between different formats.
6
+ Formats are one of: json, xml, yaml, ruby, email, url
7
+
8
+ Input can be read from STDIN as well as files given on the command-line.
9
+
10
+ Options:
11
+
12
+ -i Convert file in-place
13
+ -F Input format
14
+ -T Output format
15
+ -q Query selector in format similar to XPath (see below)
16
+ -o Write output to file instead of STDOUT
17
+
18
+ Query format:
19
+
20
+ "/items/comments[1]/text" -- 2nd comment body of each item
21
+ "/items[-1]/user/full_name" -- name of user for last item
22
+ "//user/username" -- usernames of all users anywhere in the document
data/bin/gorgeous ADDED
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ help = <<-MSG
4
+ Usage: gorgeous [-i] [-F <in-format>] [-T <out-format>] [-q <query>] [-o <destination>] FILE ...
5
+
6
+ This utility converts data between different formats.
7
+ Formats are one of: json, xml, yaml, ruby, email, url
8
+
9
+ Input can be read from STDIN as well as files given on the command-line.
10
+
11
+ Options:
12
+ -i Convert file in-place
13
+ -F Input format
14
+ -T Output format
15
+ -q Query selector in format similar to XPath (see below)
16
+ -o Write output to file instead of STDOUT
17
+
18
+ Query format:
19
+ "/items/comments[1]/text" -- 2nd comment body of each item
20
+ "/items[-1]/user/full_name" -- name of user for last item
21
+ "//user/username" -- usernames of all users anywhere in the document
22
+
23
+ MSG
24
+
25
+ if ARGV.index('-h') or ARGV.index('--help')
26
+ puts help
27
+ exit
28
+ end
29
+
30
+ require 'gorgeous'
31
+
32
+ help_usage = help.split("\n", 2).first
33
+
34
+ options = help_usage.scan(/[^\w-](-[a-z])(?: <(.+?)>)?/i).inject({}) do |opts, (flag, value)|
35
+ if value.nil?
36
+ opts[flag] = true if ARGV.delete flag
37
+ elsif idx = ARGV.index(flag)
38
+ opts[flag] = ARGV[idx + 1]
39
+ 2.times { ARGV.delete_at idx }
40
+ end
41
+ opts
42
+ end
43
+
44
+ in_place = options['-i']
45
+ query_path = options['-q']
46
+ input_format = options['-F'] && options['-F'].to_sym
47
+ output_format ||= options['-T'] && options['-T'].to_sym
48
+ destination = options['-o']
49
+ from_stdin = !$stdin.tty?
50
+
51
+ if from_stdin
52
+ if in_place
53
+ warn "can't edit stdin in place"
54
+ exit 1
55
+ end
56
+ elsif in_place and destination
57
+ warn "can't combine -i and -o flags"
58
+ exit 1
59
+ elsif ARGV.empty?
60
+ warn help_usage
61
+ exit 1
62
+ end
63
+
64
+ # clobber the input file
65
+ destination = ARGV.first if in_place
66
+
67
+ input = Gorgeous.new(ARGF, :filename => ARGV.first, :format => input_format, :query => query_path)
68
+
69
+ unless input.format
70
+ warn "unrecognized input format; specify explicit format with `-F <format>`"
71
+ exit 1
72
+ end
73
+
74
+ output_format ||= if destination
75
+ Gorgeous.filename_to_format(destination)
76
+ else
77
+ input.format
78
+ end
79
+
80
+ if output_format == :ruby
81
+ require 'pp'
82
+ output = input.data.pretty_inspect
83
+ elsif output_format == :json
84
+ require 'yajl/json_gem'
85
+ output = JSON.pretty_generate input.data
86
+ elsif output_format == :yaml
87
+ require 'yaml'
88
+ output = YAML.dump input.data
89
+ elsif input_format == :email and output_format == :xml
90
+ require 'nokogiri'
91
+ email = input.to_mail
92
+ part = email.html_part
93
+ html = Gorgeous.convert_utf8(part.decode_body, part.charset)
94
+ ugly = Nokogiri::HTML html
95
+ output = Gorgeous.pretty_xml(ugly)
96
+ elsif input_format == :email and output_format == :txt and not input.filtered?
97
+ email = input.to_mail
98
+ part = email.text_part
99
+ output = Gorgeous.convert_utf8(part.decode_body.strip, part.charset)
100
+ elsif output_format == :xml
101
+ output = Gorgeous.pretty_xml(input.to_xml)
102
+ elsif output_format == :url
103
+ require 'rack/utils'
104
+ output = Rack::Utils.build_nested_query(input.data)
105
+ elsif output_format == :txt
106
+ data = input.data
107
+ output = Array === data ? data.join("\n") : data.to_s
108
+ else
109
+ warn "don't know how to convert #{input_format} to #{output_format}"
110
+ exit 1
111
+ end
112
+
113
+ if destination
114
+ begin
115
+ File.open(destination, 'w') { |f| f << output }
116
+ rescue Errno::ENOENT
117
+ warn "error opening #{destination} for writing"
118
+ exit 1
119
+ end
120
+ else
121
+ begin
122
+ puts output
123
+ rescue Errno::EPIPE
124
+ # ignore "broken pipe" exception
125
+ end
126
+ end
data/lib/gorgeous.rb ADDED
@@ -0,0 +1,241 @@
1
+ class Gorgeous
2
+ def self.filename_to_format(filename)
3
+ case File.extname(filename)
4
+ when '.json' then :json
5
+ when '.xml', '.html' then :xml
6
+ when '.rb' then :ruby
7
+ when '.yml', '.yaml' then :yaml
8
+ when '.mail', '.email' then :email
9
+ end
10
+ end
11
+
12
+ def self.pretty_xml(ugly)
13
+ tidy = Nokogiri::XSLT DATA.read
14
+ tidy.transform(ugly).to_s
15
+ end
16
+
17
+ def self.convert_utf8(string, from_charset)
18
+ if from_charset.nil? or from_charset.downcase.tr('-', '') == 'utf8'
19
+ string
20
+ else
21
+ require 'iconv'
22
+ Iconv.conv 'utf-8', from_charset, string
23
+ end
24
+ end
25
+
26
+ def self.headers_from_mail(email)
27
+ require 'active_support/ordered_hash'
28
+ require 'active_support/core_ext/object/blank'
29
+
30
+ address_field = lambda { |name|
31
+ if field = email.header[name]
32
+ values = field.addrs.map { |a|
33
+ Mail::Encodings.unquote_and_convert_to(a.format, 'utf-8')
34
+ }
35
+ values.size < 2 ? values.first : values
36
+ end
37
+ }
38
+ header_value = lambda { |name|
39
+ field = email.header[name] and field.value.to_s
40
+ }
41
+ decoded_value = lambda { |name|
42
+ field = email.header[name]
43
+ Mail::Encodings.unquote_and_convert_to(field.value, 'utf-8') if field
44
+ }
45
+
46
+ data = ActiveSupport::OrderedHash.new
47
+ data[:subject] = decoded_value['subject']
48
+ data[:from] = address_field['from']
49
+ data[:to] = address_field['to']
50
+ data[:cc] = address_field['cc']
51
+ data[:bcc] = address_field['bcc']
52
+ data[:reply_to] = address_field['reply-to']
53
+ data[:return_path] = email.return_path
54
+
55
+ data[:message_id] = email.message_id
56
+ data[:in_reply_to] = email.in_reply_to
57
+ data[:references] = email.references
58
+
59
+ data[:date] = email.date
60
+ data[:sender] = address_field['sender']
61
+ data[:delivered_to] = header_value['delivered-to']
62
+ data[:original_sender] = header_value['x-original-sender']
63
+ data[:content_type] = email.content_type.to_s.split(';', 2).first.presence
64
+ data[:precedence] = header_value['precedence']
65
+
66
+ data.tap { |hash| hash.reject! { |k,v| v.nil? } }
67
+ end
68
+
69
+ # adapted from webmock
70
+ def self.http_from_string(raw_response)
71
+ if raw_response.is_a?(IO)
72
+ string = raw_response.read
73
+ raw_response.close
74
+ raw_response = string
75
+ end
76
+ socket = ::Net::BufferedIO.new(raw_response)
77
+ response = ::Net::HTTPResponse.read_new(socket)
78
+ transfer_encoding = response.delete('transfer-encoding') # chunks were already read
79
+ response.reading_body(socket, true) {}
80
+
81
+ options = {}
82
+ options[:headers] = {}
83
+ response.each_header { |name, value| options[:headers][name] = value }
84
+ options[:headers]['transfer-encoding'] = transfer_encoding if transfer_encoding
85
+ options[:body] = response.read_body
86
+ options[:status] = [response.code.to_i, response.message]
87
+ options
88
+ end
89
+
90
+ def initialize(input, options = {})
91
+ @input = input
92
+ @format = options[:format]
93
+ @options = options
94
+ end
95
+
96
+ def filtered?
97
+ !!@options[:query]
98
+ end
99
+
100
+ def to_s
101
+ @str ||= @input.respond_to?(:read) ? @input.read : @input
102
+ end
103
+
104
+ def format
105
+ @format ||= begin
106
+ if @options[:filename]
107
+ self.class.filename_to_format(@options[:filename])
108
+ else
109
+ guess_format
110
+ end
111
+ end
112
+ end
113
+
114
+ # typically hash or array
115
+ def data
116
+ apply_query case format
117
+ when :xml
118
+ require 'active_support/core_ext/hash/conversions'
119
+ Hash.from_xml(to_s)
120
+ when :json
121
+ require 'yajl/json_gem'
122
+ JSON.parse to_s
123
+ when :yaml
124
+ require 'yaml'
125
+ YAML.load to_s
126
+ when :email
127
+ self.class.headers_from_mail to_mail
128
+ when :ruby
129
+ eval to_s # TODO: sandbox
130
+ when :url
131
+ require 'rack/utils'
132
+ Rack::Utils.parse_nested_query(to_s.strip)
133
+ when :http
134
+ require 'net/http'
135
+ self.class.http_from_string(to_s)[:headers]
136
+ else
137
+ raise ArgumentError, "don't know how to decode #{format}"
138
+ end
139
+ end
140
+
141
+ def to_xml
142
+ require 'nokogiri'
143
+ Nokogiri to_s
144
+ end
145
+
146
+ def to_mail
147
+ require 'mail'
148
+ raw = to_s.lstrip
149
+ raw << "\n" unless raw[-1, 1] == "\n"
150
+ Mail.new raw
151
+ end
152
+
153
+ private
154
+
155
+ def guess_format
156
+ case to_s
157
+ when /\A\s*[\[\{]/ then :json
158
+ when /\A\s*</ then :xml
159
+ when /\A---\s/ then :yaml
160
+ when /\A\S+=\S+\Z/ then :url
161
+ end
162
+ end
163
+
164
+ def apply_query(obj)
165
+ if filtered?
166
+ require 'active_support/core_ext/object/blank'
167
+ query = @options[:query].dup
168
+ while query.sub!(%r{(//?)(\w+)(?:\[(-?\d+)\])?}, '')
169
+ obj = filter_resultset(obj, $2, $3, $1.length == 2)
170
+ break if obj.nil?
171
+ end
172
+ end
173
+ obj
174
+ end
175
+
176
+ def filter_resultset(obj, key, idx = nil, deep = false)
177
+ if Array === obj
178
+ obj.map { |o| filter_resultset(o, key, idx, deep) }.flatten.compact.presence
179
+ elsif Hash === obj
180
+ if deep
181
+ result = obj.map do |k, value|
182
+ if k.to_s == key.to_s then value
183
+ else filter_resultset(value, key, nil, deep)
184
+ end
185
+ end.flatten.compact.presence
186
+ else
187
+ result = obj[key.to_s] || obj[key.to_sym]
188
+ end
189
+ result = result[idx.to_i] if result and idx
190
+ result
191
+ end
192
+ end
193
+ end
194
+
195
+ __END__
196
+ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
197
+ <xsl:output method="xml" encoding="UTF-8"/>
198
+ <xsl:param name="indent-increment" select="' '"/>
199
+
200
+ <xsl:template name="newline">
201
+ <xsl:text disable-output-escaping="yes">
202
+ </xsl:text>
203
+ </xsl:template>
204
+
205
+ <xsl:template match="comment() | processing-instruction()">
206
+ <xsl:param name="indent" select="''"/>
207
+ <xsl:call-template name="newline"/>
208
+ <xsl:value-of select="$indent"/>
209
+ <xsl:copy />
210
+ </xsl:template>
211
+
212
+ <xsl:template match="text()">
213
+ <xsl:param name="indent" select="''"/>
214
+ <xsl:call-template name="newline"/>
215
+ <xsl:value-of select="$indent"/>
216
+ <xsl:value-of select="normalize-space(.)"/>
217
+ </xsl:template>
218
+
219
+ <xsl:template match="text()[normalize-space(.)='']"/>
220
+
221
+ <xsl:template match="*">
222
+ <xsl:param name="indent" select="''"/>
223
+ <xsl:call-template name="newline"/>
224
+ <xsl:value-of select="$indent"/>
225
+ <xsl:choose>
226
+ <xsl:when test="count(child::*) > 0">
227
+ <xsl:copy>
228
+ <xsl:copy-of select="@*"/>
229
+ <xsl:apply-templates select="*|text()">
230
+ <xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
231
+ </xsl:apply-templates>
232
+ <xsl:call-template name="newline"/>
233
+ <xsl:value-of select="$indent"/>
234
+ </xsl:copy>
235
+ </xsl:when>
236
+ <xsl:otherwise>
237
+ <xsl:copy-of select="."/>
238
+ </xsl:otherwise>
239
+ </xsl:choose>
240
+ </xsl:template>
241
+ </xsl:stylesheet>
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gorgeous
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - "Mislav Marohni\xC4\x87"
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-10 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email: mislav.marohnic@gmail.com
24
+ executables:
25
+ - gorgeous
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - bin/gorgeous
32
+ - lib/gorgeous.rb
33
+ - README.md
34
+ has_rdoc: false
35
+ homepage: http://github.com/mislav/gorgeous
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ hash: 3
49
+ segments:
50
+ - 0
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.5.0
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: Convert between different data formats
68
+ test_files: []
69
+