gorgeous 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README.md +22 -0
  2. data/bin/gorgeous +126 -0
  3. data/lib/gorgeous.rb +241 -0
  4. metadata +69 -0
data/README.md ADDED
@@ -0,0 +1,22 @@
1
+ Usage:
2
+
3
+ gorgeous [-i] [-F <in-format>] [-T <out-format>] [-q <query>] [-o <destination>] FILE ...
4
+
5
+ This utility converts data between different formats.
6
+ Formats are one of: json, xml, yaml, ruby, email, url
7
+
8
+ Input can be read from STDIN as well as files given on the command-line.
9
+
10
+ Options:
11
+
12
+ -i Convert file in-place
13
+ -F Input format
14
+ -T Output format
15
+ -q Query selector in format similar to XPath (see below)
16
+ -o Write output to file instead of STDOUT
17
+
18
+ Query format:
19
+
20
+ "/items/comments[1]/text" -- 2nd comment body of each item
21
+ "/items[-1]/user/full_name" -- name of user for last item
22
+ "//user/username" -- usernames of all users anywhere in the document
data/bin/gorgeous ADDED
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ help = <<-MSG
4
+ Usage: gorgeous [-i] [-F <in-format>] [-T <out-format>] [-q <query>] [-o <destination>] FILE ...
5
+
6
+ This utility converts data between different formats.
7
+ Formats are one of: json, xml, yaml, ruby, email, url
8
+
9
+ Input can be read from STDIN as well as files given on the command-line.
10
+
11
+ Options:
12
+ -i Convert file in-place
13
+ -F Input format
14
+ -T Output format
15
+ -q Query selector in format similar to XPath (see below)
16
+ -o Write output to file instead of STDOUT
17
+
18
+ Query format:
19
+ "/items/comments[1]/text" -- 2nd comment body of each item
20
+ "/items[-1]/user/full_name" -- name of user for last item
21
+ "//user/username" -- usernames of all users anywhere in the document
22
+
23
+ MSG
24
+
25
+ if ARGV.index('-h') or ARGV.index('--help')
26
+ puts help
27
+ exit
28
+ end
29
+
30
+ require 'gorgeous'
31
+
32
+ help_usage = help.split("\n", 2).first
33
+
34
+ options = help_usage.scan(/[^\w-](-[a-z])(?: <(.+?)>)?/i).inject({}) do |opts, (flag, value)|
35
+ if value.nil?
36
+ opts[flag] = true if ARGV.delete flag
37
+ elsif idx = ARGV.index(flag)
38
+ opts[flag] = ARGV[idx + 1]
39
+ 2.times { ARGV.delete_at idx }
40
+ end
41
+ opts
42
+ end
43
+
44
+ in_place = options['-i']
45
+ query_path = options['-q']
46
+ input_format = options['-F'] && options['-F'].to_sym
47
+ output_format ||= options['-T'] && options['-T'].to_sym
48
+ destination = options['-o']
49
+ from_stdin = !$stdin.tty?
50
+
51
+ if from_stdin
52
+ if in_place
53
+ warn "can't edit stdin in place"
54
+ exit 1
55
+ end
56
+ elsif in_place and destination
57
+ warn "can't combine -i and -o flags"
58
+ exit 1
59
+ elsif ARGV.empty?
60
+ warn help_usage
61
+ exit 1
62
+ end
63
+
64
+ # clobber the input file
65
+ destination = ARGV.first if in_place
66
+
67
+ input = Gorgeous.new(ARGF, :filename => ARGV.first, :format => input_format, :query => query_path)
68
+
69
+ unless input.format
70
+ warn "unrecognized input format; specify explicit format with `-F <format>`"
71
+ exit 1
72
+ end
73
+
74
+ output_format ||= if destination
75
+ Gorgeous.filename_to_format(destination)
76
+ else
77
+ input.format
78
+ end
79
+
80
+ if output_format == :ruby
81
+ require 'pp'
82
+ output = input.data.pretty_inspect
83
+ elsif output_format == :json
84
+ require 'yajl/json_gem'
85
+ output = JSON.pretty_generate input.data
86
+ elsif output_format == :yaml
87
+ require 'yaml'
88
+ output = YAML.dump input.data
89
+ elsif input_format == :email and output_format == :xml
90
+ require 'nokogiri'
91
+ email = input.to_mail
92
+ part = email.html_part
93
+ html = Gorgeous.convert_utf8(part.decode_body, part.charset)
94
+ ugly = Nokogiri::HTML html
95
+ output = Gorgeous.pretty_xml(ugly)
96
+ elsif input_format == :email and output_format == :txt and not input.filtered?
97
+ email = input.to_mail
98
+ part = email.text_part
99
+ output = Gorgeous.convert_utf8(part.decode_body.strip, part.charset)
100
+ elsif output_format == :xml
101
+ output = Gorgeous.pretty_xml(input.to_xml)
102
+ elsif output_format == :url
103
+ require 'rack/utils'
104
+ output = Rack::Utils.build_nested_query(input.data)
105
+ elsif output_format == :txt
106
+ data = input.data
107
+ output = Array === data ? data.join("\n") : data.to_s
108
+ else
109
+ warn "don't know how to convert #{input_format} to #{output_format}"
110
+ exit 1
111
+ end
112
+
113
+ if destination
114
+ begin
115
+ File.open(destination, 'w') { |f| f << output }
116
+ rescue Errno::ENOENT
117
+ warn "error opening #{destination} for writing"
118
+ exit 1
119
+ end
120
+ else
121
+ begin
122
+ puts output
123
+ rescue Errno::EPIPE
124
+ # ignore "broken pipe" exception
125
+ end
126
+ end
data/lib/gorgeous.rb ADDED
@@ -0,0 +1,241 @@
1
+ class Gorgeous
2
+ def self.filename_to_format(filename)
3
+ case File.extname(filename)
4
+ when '.json' then :json
5
+ when '.xml', '.html' then :xml
6
+ when '.rb' then :ruby
7
+ when '.yml', '.yaml' then :yaml
8
+ when '.mail', '.email' then :email
9
+ end
10
+ end
11
+
12
+ def self.pretty_xml(ugly)
13
+ tidy = Nokogiri::XSLT DATA.read
14
+ tidy.transform(ugly).to_s
15
+ end
16
+
17
+ def self.convert_utf8(string, from_charset)
18
+ if from_charset.nil? or from_charset.downcase.tr('-', '') == 'utf8'
19
+ string
20
+ else
21
+ require 'iconv'
22
+ Iconv.conv 'utf-8', from_charset, string
23
+ end
24
+ end
25
+
26
+ def self.headers_from_mail(email)
27
+ require 'active_support/ordered_hash'
28
+ require 'active_support/core_ext/object/blank'
29
+
30
+ address_field = lambda { |name|
31
+ if field = email.header[name]
32
+ values = field.addrs.map { |a|
33
+ Mail::Encodings.unquote_and_convert_to(a.format, 'utf-8')
34
+ }
35
+ values.size < 2 ? values.first : values
36
+ end
37
+ }
38
+ header_value = lambda { |name|
39
+ field = email.header[name] and field.value.to_s
40
+ }
41
+ decoded_value = lambda { |name|
42
+ field = email.header[name]
43
+ Mail::Encodings.unquote_and_convert_to(field.value, 'utf-8') if field
44
+ }
45
+
46
+ data = ActiveSupport::OrderedHash.new
47
+ data[:subject] = decoded_value['subject']
48
+ data[:from] = address_field['from']
49
+ data[:to] = address_field['to']
50
+ data[:cc] = address_field['cc']
51
+ data[:bcc] = address_field['bcc']
52
+ data[:reply_to] = address_field['reply-to']
53
+ data[:return_path] = email.return_path
54
+
55
+ data[:message_id] = email.message_id
56
+ data[:in_reply_to] = email.in_reply_to
57
+ data[:references] = email.references
58
+
59
+ data[:date] = email.date
60
+ data[:sender] = address_field['sender']
61
+ data[:delivered_to] = header_value['delivered-to']
62
+ data[:original_sender] = header_value['x-original-sender']
63
+ data[:content_type] = email.content_type.to_s.split(';', 2).first.presence
64
+ data[:precedence] = header_value['precedence']
65
+
66
+ data.tap { |hash| hash.reject! { |k,v| v.nil? } }
67
+ end
68
+
69
+ # adapted from webmock
70
+ def self.http_from_string(raw_response)
71
+ if raw_response.is_a?(IO)
72
+ string = raw_response.read
73
+ raw_response.close
74
+ raw_response = string
75
+ end
76
+ socket = ::Net::BufferedIO.new(raw_response)
77
+ response = ::Net::HTTPResponse.read_new(socket)
78
+ transfer_encoding = response.delete('transfer-encoding') # chunks were already read
79
+ response.reading_body(socket, true) {}
80
+
81
+ options = {}
82
+ options[:headers] = {}
83
+ response.each_header { |name, value| options[:headers][name] = value }
84
+ options[:headers]['transfer-encoding'] = transfer_encoding if transfer_encoding
85
+ options[:body] = response.read_body
86
+ options[:status] = [response.code.to_i, response.message]
87
+ options
88
+ end
89
+
90
+ def initialize(input, options = {})
91
+ @input = input
92
+ @format = options[:format]
93
+ @options = options
94
+ end
95
+
96
+ def filtered?
97
+ !!@options[:query]
98
+ end
99
+
100
+ def to_s
101
+ @str ||= @input.respond_to?(:read) ? @input.read : @input
102
+ end
103
+
104
+ def format
105
+ @format ||= begin
106
+ if @options[:filename]
107
+ self.class.filename_to_format(@options[:filename])
108
+ else
109
+ guess_format
110
+ end
111
+ end
112
+ end
113
+
114
+ # typically hash or array
115
+ def data
116
+ apply_query case format
117
+ when :xml
118
+ require 'active_support/core_ext/hash/conversions'
119
+ Hash.from_xml(to_s)
120
+ when :json
121
+ require 'yajl/json_gem'
122
+ JSON.parse to_s
123
+ when :yaml
124
+ require 'yaml'
125
+ YAML.load to_s
126
+ when :email
127
+ self.class.headers_from_mail to_mail
128
+ when :ruby
129
+ eval to_s # TODO: sandbox
130
+ when :url
131
+ require 'rack/utils'
132
+ Rack::Utils.parse_nested_query(to_s.strip)
133
+ when :http
134
+ require 'net/http'
135
+ self.class.http_from_string(to_s)[:headers]
136
+ else
137
+ raise ArgumentError, "don't know how to decode #{format}"
138
+ end
139
+ end
140
+
141
+ def to_xml
142
+ require 'nokogiri'
143
+ Nokogiri to_s
144
+ end
145
+
146
+ def to_mail
147
+ require 'mail'
148
+ raw = to_s.lstrip
149
+ raw << "\n" unless raw[-1, 1] == "\n"
150
+ Mail.new raw
151
+ end
152
+
153
+ private
154
+
155
+ def guess_format
156
+ case to_s
157
+ when /\A\s*[\[\{]/ then :json
158
+ when /\A\s*</ then :xml
159
+ when /\A---\s/ then :yaml
160
+ when /\A\S+=\S+\Z/ then :url
161
+ end
162
+ end
163
+
164
+ def apply_query(obj)
165
+ if filtered?
166
+ require 'active_support/core_ext/object/blank'
167
+ query = @options[:query].dup
168
+ while query.sub!(%r{(//?)(\w+)(?:\[(-?\d+)\])?}, '')
169
+ obj = filter_resultset(obj, $2, $3, $1.length == 2)
170
+ break if obj.nil?
171
+ end
172
+ end
173
+ obj
174
+ end
175
+
176
+ def filter_resultset(obj, key, idx = nil, deep = false)
177
+ if Array === obj
178
+ obj.map { |o| filter_resultset(o, key, idx, deep) }.flatten.compact.presence
179
+ elsif Hash === obj
180
+ if deep
181
+ result = obj.map do |k, value|
182
+ if k.to_s == key.to_s then value
183
+ else filter_resultset(value, key, nil, deep)
184
+ end
185
+ end.flatten.compact.presence
186
+ else
187
+ result = obj[key.to_s] || obj[key.to_sym]
188
+ end
189
+ result = result[idx.to_i] if result and idx
190
+ result
191
+ end
192
+ end
193
+ end
194
+
195
+ __END__
196
+ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
197
+ <xsl:output method="xml" encoding="UTF-8"/>
198
+ <xsl:param name="indent-increment" select="' '"/>
199
+
200
+ <xsl:template name="newline">
201
+ <xsl:text disable-output-escaping="yes">
202
+ </xsl:text>
203
+ </xsl:template>
204
+
205
+ <xsl:template match="comment() | processing-instruction()">
206
+ <xsl:param name="indent" select="''"/>
207
+ <xsl:call-template name="newline"/>
208
+ <xsl:value-of select="$indent"/>
209
+ <xsl:copy />
210
+ </xsl:template>
211
+
212
+ <xsl:template match="text()">
213
+ <xsl:param name="indent" select="''"/>
214
+ <xsl:call-template name="newline"/>
215
+ <xsl:value-of select="$indent"/>
216
+ <xsl:value-of select="normalize-space(.)"/>
217
+ </xsl:template>
218
+
219
+ <xsl:template match="text()[normalize-space(.)='']"/>
220
+
221
+ <xsl:template match="*">
222
+ <xsl:param name="indent" select="''"/>
223
+ <xsl:call-template name="newline"/>
224
+ <xsl:value-of select="$indent"/>
225
+ <xsl:choose>
226
+ <xsl:when test="count(child::*) > 0">
227
+ <xsl:copy>
228
+ <xsl:copy-of select="@*"/>
229
+ <xsl:apply-templates select="*|text()">
230
+ <xsl:with-param name="indent" select="concat ($indent, $indent-increment)"/>
231
+ </xsl:apply-templates>
232
+ <xsl:call-template name="newline"/>
233
+ <xsl:value-of select="$indent"/>
234
+ </xsl:copy>
235
+ </xsl:when>
236
+ <xsl:otherwise>
237
+ <xsl:copy-of select="."/>
238
+ </xsl:otherwise>
239
+ </xsl:choose>
240
+ </xsl:template>
241
+ </xsl:stylesheet>
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gorgeous
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - "Mislav Marohni\xC4\x87"
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-10 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email: mislav.marohnic@gmail.com
24
+ executables:
25
+ - gorgeous
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - bin/gorgeous
32
+ - lib/gorgeous.rb
33
+ - README.md
34
+ has_rdoc: false
35
+ homepage: http://github.com/mislav/gorgeous
36
+ licenses: []
37
+
38
+ post_install_message:
39
+ rdoc_options: []
40
+
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ hash: 3
49
+ segments:
50
+ - 0
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ hash: 3
58
+ segments:
59
+ - 0
60
+ version: "0"
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.5.0
65
+ signing_key:
66
+ specification_version: 3
67
+ summary: Convert between different data formats
68
+ test_files: []
69
+