rawk 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +72 -26
- data/bin/rawk +5 -4
- data/lib/rawk/rawk.rb +24 -10
- data/rawk.gemspec +1 -1
- data/spec/rawk/rawk_spec.rb +32 -10
- data/spec/rawk/record_spec.rb +58 -0
- metadata +4 -4
- data/spec/rawk/line_spec.rb +0 -37
data/README.rdoc
CHANGED
@@ -10,9 +10,13 @@ Obviously, you can use ruby -e and {other magic}[http://code.joejag.com/2009/usi
|
|
10
10
|
|
11
11
|
== Install
|
12
12
|
|
13
|
-
|
13
|
+
From the command line
|
14
14
|
|
15
|
-
|
15
|
+
gem install rawk
|
16
|
+
|
17
|
+
Using bundler
|
18
|
+
|
19
|
+
gem "rawk", "~> 0.1.2"
|
16
20
|
|
17
21
|
== Example
|
18
22
|
|
@@ -35,7 +39,7 @@ Creates the following output
|
|
35
39
|
|
36
40
|
This can be written using rawk as
|
37
41
|
|
38
|
-
$ ls -ltr |
|
42
|
+
$ ls -ltr | rawk '
|
39
43
|
start {puts "Starting..."}
|
40
44
|
every {|record| puts "#{record.cols[8]} #{record.cols[0]}"}
|
41
45
|
finish {puts "done"} '
|
@@ -64,7 +68,7 @@ Runs before any lines are read from the input stream. Equivalent to a BEGIN con
|
|
64
68
|
|
65
69
|
every {|record| <code>}
|
66
70
|
|
67
|
-
Runs once for each line of input data. Yields an object of type
|
71
|
+
Runs once for each line of input data. Yields an object of type Record (see below)
|
68
72
|
Equivalent to an anonymous block such as awk '{print $1}'
|
69
73
|
|
70
74
|
finish {<code>}
|
@@ -74,7 +78,7 @@ Equivalent to an END condition in awk
|
|
74
78
|
|
75
79
|
You can provide multiple blocks of code for each condition.
|
76
80
|
|
77
|
-
ls -ltr | head -2 |
|
81
|
+
ls -ltr | head -2 | rawk '
|
78
82
|
every {|record| puts 1}
|
79
83
|
every {|record| puts 2} '
|
80
84
|
|
@@ -89,11 +93,11 @@ prints
|
|
89
93
|
|
90
94
|
* Conditional blocks
|
91
95
|
|
92
|
-
==
|
96
|
+
== Records
|
93
97
|
|
94
|
-
every yields an object of type
|
98
|
+
every yields an object of type Record which is subclass of String that adds a cols method to access columns. The cols method returns an array of column values.
|
95
99
|
|
96
|
-
echo "hello world" |
|
100
|
+
echo "hello world" | rawk 'every do |record|
|
97
101
|
puts "#{record.cols.length} columns: #{record.cols.join(",")}"
|
98
102
|
end'
|
99
103
|
|
@@ -101,19 +105,25 @@ every yields an object of type Line which is subclass of String that adds a cols
|
|
101
105
|
|
102
106
|
Note that cols is aliased to c for convenience
|
103
107
|
|
104
|
-
echo "hello world" |
|
108
|
+
echo "hello world" | rawk 'every do |record|
|
105
109
|
puts record.c[0]
|
106
110
|
end'
|
107
111
|
|
108
112
|
-> hello
|
113
|
+
|
114
|
+
In most cases you will be dealing with a few columns of data so Record provides functions that allow you to access columns the first 10 columns directly by position name.
|
109
115
|
|
116
|
+
echo hello world from me | rawk 'every {|r| puts "#{r.first} #{r.third}"}'
|
117
|
+
|
118
|
+
-> hello from
|
119
|
+
|
110
120
|
== Functions, classes and other ruby stuff
|
111
121
|
|
112
|
-
You can use ruby as normal
|
122
|
+
You can use ruby as normal. For example...
|
113
123
|
|
114
124
|
Functions
|
115
125
|
|
116
|
-
echo hello world |
|
126
|
+
echo hello world | rawk '
|
117
127
|
def print_first_column(record)
|
118
128
|
puts record.cols.first
|
119
129
|
end
|
@@ -121,7 +131,7 @@ Functions
|
|
121
131
|
|
122
132
|
Classes
|
123
133
|
|
124
|
-
echo hello world |
|
134
|
+
echo hello world | rawk '
|
125
135
|
class Printer
|
126
136
|
def self.print_first(record)
|
127
137
|
puts record.cols.first
|
@@ -133,28 +143,63 @@ Requires and gems
|
|
133
143
|
|
134
144
|
require works as you would expect although rubygems is not required by default.
|
135
145
|
|
136
|
-
echo "ruby" |
|
146
|
+
echo "ruby" | rawk '
|
137
147
|
require "rubygems"
|
138
148
|
require "active_support/all"
|
139
149
|
every {|record| puts record.cols.first.pluralize} '
|
140
150
|
|
141
151
|
-> rubies
|
142
152
|
|
153
|
+
== Variables and Scope
|
154
|
+
|
155
|
+
Variables defined inside the condition blocks (start, every, finish) are local to that block. Create a member variables to share state between blocks.
|
156
|
+
|
157
|
+
ls | tail -2 | rawk '
|
158
|
+
start do
|
159
|
+
local = "foo"
|
160
|
+
@shared = "bar"
|
161
|
+
puts "Starting with #{local}"
|
162
|
+
end
|
163
|
+
every {|record| puts "Running with #{@shared}"} '
|
164
|
+
|
165
|
+
-> Starting with foo
|
166
|
+
Running with bar
|
167
|
+
Running with bar
|
168
|
+
|
143
169
|
== Builtins
|
144
170
|
|
145
171
|
rawk provides builtins as member variables. You can change them as you see fit.
|
146
172
|
|
147
173
|
@nr holds the current record number
|
148
|
-
ls -ltr | head -2 |
|
174
|
+
ls -ltr | head -2 | rawk 'every {puts @nr}'
|
149
175
|
|
150
176
|
@fs specifies the field separator applied to each record
|
151
177
|
|
152
|
-
echo "foo.bar" |
|
178
|
+
echo "foo.bar" | rawk '
|
153
179
|
start {@fs="."}
|
154
180
|
every {|record| puts "1: #{record.cols[0]} 2: #{record.cols[1]}"} '
|
155
181
|
|
156
182
|
-> 1: foo 2: bar
|
157
183
|
|
184
|
+
@rs specifies the record separator" character
|
185
|
+
* Defaults to newline
|
186
|
+
* Note that, unlike awk, @rs can only be set in the start block. It cannot be changed "in flight"
|
187
|
+
|
188
|
+
ksh print -n "foo.bar." | bin/rawk '
|
189
|
+
start {@rs = "."}
|
190
|
+
every {|r| puts r.cols.first} '
|
191
|
+
|
192
|
+
-> foo
|
193
|
+
bar
|
194
|
+
|
195
|
+
NF: Keeps a count of the number of fields in an input record. The last field in the input record can be designated by $NF.
|
196
|
+
* Each Record yielded by the every block has a '.nf' method
|
197
|
+
* $NF can be coded as 'every {|record| record.cols.last}'
|
198
|
+
|
199
|
+
echo "foo bar" | rawk 'every {|record| puts "#{record.nf} fields"}'
|
200
|
+
|
201
|
+
-> 2 fields
|
202
|
+
|
158
203
|
=== Not supported (yet)
|
159
204
|
|
160
205
|
I'm working on support for the following awk built-ins
|
@@ -164,21 +209,11 @@ Contains the name of the current input-file.
|
|
164
209
|
* Reading input data is not supported yet
|
165
210
|
* When I add it, I'll add @filename as a member
|
166
211
|
|
167
|
-
RS:
|
168
|
-
Stores the current "record separator" character. Since, by default, an input line is the input record, the default record separator character is a "newline".
|
169
|
-
* Will be @rs
|
170
|
-
* Currently, records are delimited by newline
|
171
|
-
|
172
212
|
|
173
213
|
=== Redundant
|
174
214
|
|
175
215
|
The following awk built-ins are redundant in ruby
|
176
216
|
|
177
|
-
NF:
|
178
|
-
Keeps a count of the number of fields in an input record. The last field in the input record can be designated by $NF.
|
179
|
-
* NF can be coded as 'every {|record| record.cols.size}'
|
180
|
-
* $NF can be coded as 'every {|record| record.cols.last}'
|
181
|
-
|
182
217
|
OFS:
|
183
218
|
Stores the "output field separator", which separates the fields when Awk prints them. The default is a "space" character.
|
184
219
|
* Ruby's string handling is far superior to awk's so there is no point in implementing a print routine
|
@@ -192,4 +227,15 @@ OFMT: Stores the format for numeric output. The default format is "%.6g".
|
|
192
227
|
|
193
228
|
== Using rawk inside a ruby program
|
194
229
|
|
195
|
-
|
230
|
+
Rawk code is evaluated within an instance of Rawk::Program. You can use rawk within your programs as follows...
|
231
|
+
|
232
|
+
require 'rubygems'
|
233
|
+
require 'rawk'
|
234
|
+
|
235
|
+
data = "foo\nbar"
|
236
|
+
program = Rawk::Program.new(data)
|
237
|
+
|
238
|
+
program.run do
|
239
|
+
every {|record| puts record.cols.first}
|
240
|
+
end
|
241
|
+
|
data/bin/rawk
CHANGED
data/lib/rawk/rawk.rb
CHANGED
@@ -3,26 +3,27 @@ require 'observer'
|
|
3
3
|
|
4
4
|
module Rawk
|
5
5
|
class Program
|
6
|
-
attr_reader :fs
|
6
|
+
attr_reader :fs, :rs
|
7
7
|
|
8
8
|
def initialize(io)
|
9
9
|
@start, @every, @finish = Set.new, Set.new, Set.new
|
10
|
+
initialize_builtins!
|
10
11
|
@input_stream = InputStream.new(io)
|
11
12
|
@input_stream.add_observer(self)
|
12
|
-
initialize_builtins!
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
16
16
|
def initialize_builtins!
|
17
17
|
@fs = " "
|
18
18
|
@nr = 0
|
19
|
+
@rs = "\n"
|
19
20
|
end
|
20
21
|
|
21
22
|
public
|
22
23
|
def on_new_line
|
23
24
|
@nr += 1
|
24
25
|
end
|
25
|
-
alias :update :on_new_line
|
26
|
+
alias :update :on_new_line # required by Observer
|
26
27
|
|
27
28
|
def run(code = "", &block)
|
28
29
|
load!(code, &block)
|
@@ -53,8 +54,9 @@ module Rawk
|
|
53
54
|
|
54
55
|
def execute_code!
|
55
56
|
@start.each {|b| b.call}
|
56
|
-
@input_stream.each_line do |
|
57
|
-
|
57
|
+
@input_stream.each_line(@rs) do |str|
|
58
|
+
record = Record.new(str, @fs, @rs)
|
59
|
+
@every.each {|b| b.call(record)}
|
58
60
|
end
|
59
61
|
@finish.each {|b| b.call}
|
60
62
|
end
|
@@ -67,8 +69,8 @@ module Rawk
|
|
67
69
|
@io = io
|
68
70
|
end
|
69
71
|
|
70
|
-
def each_line
|
71
|
-
@io.each_line do |line|
|
72
|
+
def each_line(separator)
|
73
|
+
@io.each_line(separator) do |line|
|
72
74
|
changed
|
73
75
|
notify_observers
|
74
76
|
yield line
|
@@ -76,9 +78,15 @@ module Rawk
|
|
76
78
|
end
|
77
79
|
end
|
78
80
|
|
79
|
-
class
|
80
|
-
def
|
81
|
-
|
81
|
+
class Record < String
|
82
|
+
def self.column_postion_accessor(name, position)
|
83
|
+
define_method(name.to_sym) do
|
84
|
+
cols[position]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def initialize(str, fs, eor = "\n")
|
89
|
+
self.replace(str.chomp(eor))
|
82
90
|
@fs = fs
|
83
91
|
end
|
84
92
|
|
@@ -90,5 +98,11 @@ module Rawk
|
|
90
98
|
def nf
|
91
99
|
cols.length
|
92
100
|
end
|
101
|
+
|
102
|
+
{:first => 0,:second => 1, :third => 2, :fourth => 3,:fifth => 4,
|
103
|
+
:sixth => 5,:seventh => 6,:eighth => 7,:ninth => 8, :tenth => 9
|
104
|
+
}.each do |name, column_position|
|
105
|
+
column_postion_accessor name, column_position
|
106
|
+
end
|
93
107
|
end
|
94
108
|
end
|
data/rawk.gemspec
CHANGED
data/spec/rawk/rawk_spec.rb
CHANGED
@@ -113,34 +113,56 @@ module Rawk
|
|
113
113
|
end
|
114
114
|
|
115
115
|
describe "support for standard awk built-in variables" do
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
116
|
+
describe "@nr (current record)" do
|
117
|
+
it "holds the current record number" do
|
118
|
+
record_nums = []
|
119
|
+
@program.run do
|
120
|
+
start {record_nums << @nr}
|
121
|
+
every {record_nums << @nr}
|
122
|
+
finish {record_nums << @nr}
|
123
|
+
end
|
124
|
+
record_nums.should == [0,1,2,3,3]
|
122
125
|
end
|
123
|
-
record_nums.should == [0,1,2,3,3]
|
124
126
|
end
|
125
127
|
|
126
|
-
describe "fs" do
|
128
|
+
describe "@fs (field separator)" do
|
127
129
|
it "holds the current field separator expression" do
|
128
130
|
@program.fs.should == " "
|
129
131
|
end
|
130
132
|
it "is applied to each line of data" do
|
131
133
|
data = "line"
|
132
|
-
|
134
|
+
Record.should_receive(:new).with(data, " ", "\n")
|
133
135
|
Program.new(data).run {every {|l| nil}}
|
134
136
|
end
|
135
137
|
it "can be changed by the user's program" do
|
136
138
|
data = "line"
|
137
|
-
|
139
|
+
Record.should_receive(:new).with(data, ",", "\n").and_return("dummy")
|
138
140
|
Program.new(data).run do
|
139
141
|
start {@fs = ','}
|
140
142
|
every {|l| nil}
|
141
143
|
end
|
142
144
|
end
|
143
145
|
end
|
146
|
+
|
147
|
+
describe "@rs (record separator)" do
|
148
|
+
it "defaults to a newline" do
|
149
|
+
@program.rs.should == "\n"
|
150
|
+
end
|
151
|
+
it "can be changed by the user" do
|
152
|
+
data = "line1.line2."
|
153
|
+
Record.should_receive(:new).
|
154
|
+
with("line1.", " ", ".").once.ordered.
|
155
|
+
and_return("")
|
156
|
+
Record.should_receive(:new).
|
157
|
+
with("line2.", " ", ".").once.ordered.
|
158
|
+
and_return("")
|
159
|
+
|
160
|
+
Program.new(data).run do
|
161
|
+
start {@rs = "."}
|
162
|
+
every {|rec| nil}
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
144
166
|
end
|
145
167
|
end
|
146
168
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Rawk
|
4
|
+
describe Record do
|
5
|
+
before do
|
6
|
+
@space = " "
|
7
|
+
@data = "a b c\n"
|
8
|
+
@record = Record.new(@data, @space)
|
9
|
+
end
|
10
|
+
it "is a string" do
|
11
|
+
@record.is_a?(String).should be_true
|
12
|
+
end
|
13
|
+
it "chomps itself on creation" do
|
14
|
+
@record.should == "a b c"
|
15
|
+
end
|
16
|
+
it "calculates and array of columns" do
|
17
|
+
@record.cols.should == ["a","b","c"]
|
18
|
+
@record.c.should == @record.cols
|
19
|
+
end
|
20
|
+
it "calculates the number of fields" do
|
21
|
+
@record.nf.should == 3
|
22
|
+
end
|
23
|
+
|
24
|
+
context "accessing columns by name" do
|
25
|
+
before do
|
26
|
+
@record = Record.new("a b c d e f g h i j", @space)
|
27
|
+
end
|
28
|
+
|
29
|
+
{ :first => "a", :second => "b", :third => "c",
|
30
|
+
:fourth => "d", :fifth => "e", :sixth => "f",
|
31
|
+
:seventh => "g", :eighth => "h", :ninth => "i",
|
32
|
+
:tenth => "j" }.each do |method, expected|
|
33
|
+
it "finds the #{method} column value" do
|
34
|
+
@record.send(method).should == expected
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "with a field separator ','" do
|
40
|
+
it "splits the line into columns using a comma" do
|
41
|
+
record = Record.new("a,b,c d", ",")
|
42
|
+
record.c.should == ["a","b","c d"]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
context "with a regular expression field separator" do
|
46
|
+
it "splits the line into columns using the regular expression" do
|
47
|
+
record = Record.new("a,b|c", /[,|]/)
|
48
|
+
record.c.should == ["a","b","c"]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
context "with an explict end of line marker" do
|
52
|
+
it "chomps the end of line marker" do
|
53
|
+
record = Record.new("a b c.", @space, ".")
|
54
|
+
record.should == "a b c"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rawk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-08-
|
12
|
+
date: 2011-08-16 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! 'An awk-inspired ruby DSL
|
15
15
|
|
@@ -27,8 +27,8 @@ files:
|
|
27
27
|
- bin/rawk
|
28
28
|
- lib/rawk/rawk.rb
|
29
29
|
- rawk.gemspec
|
30
|
-
- spec/rawk/line_spec.rb
|
31
30
|
- spec/rawk/rawk_spec.rb
|
31
|
+
- spec/rawk/record_spec.rb
|
32
32
|
- spec/spec_helper.rb
|
33
33
|
homepage: https://github.com/mowat27/rawk
|
34
34
|
licenses: []
|
@@ -55,6 +55,6 @@ signing_key:
|
|
55
55
|
specification_version: 3
|
56
56
|
summary: An awk-inspired ruby DSL
|
57
57
|
test_files:
|
58
|
-
- spec/rawk/line_spec.rb
|
59
58
|
- spec/rawk/rawk_spec.rb
|
59
|
+
- spec/rawk/record_spec.rb
|
60
60
|
- spec/spec_helper.rb
|
data/spec/rawk/line_spec.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
module Rawk
|
4
|
-
describe Line do
|
5
|
-
before do
|
6
|
-
@fs = " "
|
7
|
-
@data = "a b c\n"
|
8
|
-
@line = Line.new(@data, @fs)
|
9
|
-
end
|
10
|
-
it "is a string" do
|
11
|
-
@line.is_a?(String).should be_true
|
12
|
-
end
|
13
|
-
it "chomps itself on creation" do
|
14
|
-
@line.should == "a b c"
|
15
|
-
end
|
16
|
-
it "finds space-delimited columns by" do
|
17
|
-
@line.cols.should == ["a","b","c"]
|
18
|
-
@line.c.should == @line.cols
|
19
|
-
end
|
20
|
-
it "calculates the number of fields" do
|
21
|
-
@line.nf.should == 3
|
22
|
-
end
|
23
|
-
|
24
|
-
context "with a field separator ','" do
|
25
|
-
it "splits the line into columns using a comma" do
|
26
|
-
line = Line.new("a,b,c d", ",")
|
27
|
-
line.c.should == ["a","b","c d"]
|
28
|
-
end
|
29
|
-
end
|
30
|
-
context "with a regular expression field separator" do
|
31
|
-
it "splits the line into columns using the regular expression" do
|
32
|
-
line = Line.new("a,b|c", /[,|]/)
|
33
|
-
line.c.should == ["a","b","c"]
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|