rawk 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +72 -26
- data/bin/rawk +5 -4
- data/lib/rawk/rawk.rb +24 -10
- data/rawk.gemspec +1 -1
- data/spec/rawk/rawk_spec.rb +32 -10
- data/spec/rawk/record_spec.rb +58 -0
- metadata +4 -4
- data/spec/rawk/line_spec.rb +0 -37
data/README.rdoc
CHANGED
@@ -10,9 +10,13 @@ Obviously, you can use ruby -e and {other magic}[http://code.joejag.com/2009/usi
|
|
10
10
|
|
11
11
|
== Install
|
12
12
|
|
13
|
-
|
13
|
+
From the command line
|
14
14
|
|
15
|
-
|
15
|
+
gem install rawk
|
16
|
+
|
17
|
+
Using bundler
|
18
|
+
|
19
|
+
gem "rawk", "~> 0.1.2"
|
16
20
|
|
17
21
|
== Example
|
18
22
|
|
@@ -35,7 +39,7 @@ Creates the following output
|
|
35
39
|
|
36
40
|
This can be written using rawk as
|
37
41
|
|
38
|
-
$ ls -ltr |
|
42
|
+
$ ls -ltr | rawk '
|
39
43
|
start {puts "Starting..."}
|
40
44
|
every {|record| puts "#{record.cols[8]} #{record.cols[0]}"}
|
41
45
|
finish {puts "done"} '
|
@@ -64,7 +68,7 @@ Runs before any lines are read from the input stream. Equivalent to a BEGIN con
|
|
64
68
|
|
65
69
|
every {|record| <code>}
|
66
70
|
|
67
|
-
Runs once for each line of input data. Yields an object of type
|
71
|
+
Runs once for each line of input data. Yields an object of type Record (see below)
|
68
72
|
Equivalent to an anonymous block such as awk '{print $1}'
|
69
73
|
|
70
74
|
finish {<code>}
|
@@ -74,7 +78,7 @@ Equivalent to an END condition in awk
|
|
74
78
|
|
75
79
|
You can provide multiple blocks of code for each condition.
|
76
80
|
|
77
|
-
ls -ltr | head -2 |
|
81
|
+
ls -ltr | head -2 | rawk '
|
78
82
|
every {|record| puts 1}
|
79
83
|
every {|record| puts 2} '
|
80
84
|
|
@@ -89,11 +93,11 @@ prints
|
|
89
93
|
|
90
94
|
* Conditional blocks
|
91
95
|
|
92
|
-
==
|
96
|
+
== Records
|
93
97
|
|
94
|
-
every yields an object of type
|
98
|
+
every yields an object of type Record which is subclass of String that adds a cols method to access columns. The cols method returns an array of column values.
|
95
99
|
|
96
|
-
echo "hello world" |
|
100
|
+
echo "hello world" | rawk 'every do |record|
|
97
101
|
puts "#{record.cols.length} columns: #{record.cols.join(",")}"
|
98
102
|
end'
|
99
103
|
|
@@ -101,19 +105,25 @@ every yields an object of type Line which is subclass of String that adds a cols
|
|
101
105
|
|
102
106
|
Note that cols is aliased to c for convenience
|
103
107
|
|
104
|
-
echo "hello world" |
|
108
|
+
echo "hello world" | rawk 'every do |record|
|
105
109
|
puts record.c[0]
|
106
110
|
end'
|
107
111
|
|
108
112
|
-> hello
|
113
|
+
|
114
|
+
In most cases you will be dealing with a few columns of data so Record provides functions that allow you to access columns the first 10 columns directly by position name.
|
109
115
|
|
116
|
+
echo hello world from me | rawk 'every {|r| puts "#{r.first} #{r.third}"}'
|
117
|
+
|
118
|
+
-> hello from
|
119
|
+
|
110
120
|
== Functions, classes and other ruby stuff
|
111
121
|
|
112
|
-
You can use ruby as normal
|
122
|
+
You can use ruby as normal. For example...
|
113
123
|
|
114
124
|
Functions
|
115
125
|
|
116
|
-
echo hello world |
|
126
|
+
echo hello world | rawk '
|
117
127
|
def print_first_column(record)
|
118
128
|
puts record.cols.first
|
119
129
|
end
|
@@ -121,7 +131,7 @@ Functions
|
|
121
131
|
|
122
132
|
Classes
|
123
133
|
|
124
|
-
echo hello world |
|
134
|
+
echo hello world | rawk '
|
125
135
|
class Printer
|
126
136
|
def self.print_first(record)
|
127
137
|
puts record.cols.first
|
@@ -133,28 +143,63 @@ Requires and gems
|
|
133
143
|
|
134
144
|
require works as you would expect although rubygems is not required by default.
|
135
145
|
|
136
|
-
echo "ruby" |
|
146
|
+
echo "ruby" | rawk '
|
137
147
|
require "rubygems"
|
138
148
|
require "active_support/all"
|
139
149
|
every {|record| puts record.cols.first.pluralize} '
|
140
150
|
|
141
151
|
-> rubies
|
142
152
|
|
153
|
+
== Variables and Scope
|
154
|
+
|
155
|
+
Variables defined inside the condition blocks (start, every, finish) are local to that block. Create a member variables to share state between blocks.
|
156
|
+
|
157
|
+
ls | tail -2 | rawk '
|
158
|
+
start do
|
159
|
+
local = "foo"
|
160
|
+
@shared = "bar"
|
161
|
+
puts "Starting with #{local}"
|
162
|
+
end
|
163
|
+
every {|record| puts "Running with #{@shared}"} '
|
164
|
+
|
165
|
+
-> Starting with foo
|
166
|
+
Running with bar
|
167
|
+
Running with bar
|
168
|
+
|
143
169
|
== Builtins
|
144
170
|
|
145
171
|
rawk provides builtins as member variables. You can change them as you see fit.
|
146
172
|
|
147
173
|
@nr holds the current record number
|
148
|
-
ls -ltr | head -2 |
|
174
|
+
ls -ltr | head -2 | rawk 'every {puts @nr}'
|
149
175
|
|
150
176
|
@fs specifies the field separator applied to each record
|
151
177
|
|
152
|
-
echo "foo.bar" |
|
178
|
+
echo "foo.bar" | rawk '
|
153
179
|
start {@fs="."}
|
154
180
|
every {|record| puts "1: #{record.cols[0]} 2: #{record.cols[1]}"} '
|
155
181
|
|
156
182
|
-> 1: foo 2: bar
|
157
183
|
|
184
|
+
@rs specifies the record separator" character
|
185
|
+
* Defaults to newline
|
186
|
+
* Note that, unlike awk, @rs can only be set in the start block. It cannot be changed "in flight"
|
187
|
+
|
188
|
+
ksh print -n "foo.bar." | bin/rawk '
|
189
|
+
start {@rs = "."}
|
190
|
+
every {|r| puts r.cols.first} '
|
191
|
+
|
192
|
+
-> foo
|
193
|
+
bar
|
194
|
+
|
195
|
+
NF: Keeps a count of the number of fields in an input record. The last field in the input record can be designated by $NF.
|
196
|
+
* Each Record yielded by the every block has a '.nf' method
|
197
|
+
* $NF can be coded as 'every {|record| record.cols.last}'
|
198
|
+
|
199
|
+
echo "foo bar" | rawk 'every {|record| puts "#{record.nf} fields"}'
|
200
|
+
|
201
|
+
-> 2 fields
|
202
|
+
|
158
203
|
=== Not supported (yet)
|
159
204
|
|
160
205
|
I'm working on support for the following awk built-ins
|
@@ -164,21 +209,11 @@ Contains the name of the current input-file.
|
|
164
209
|
* Reading input data is not supported yet
|
165
210
|
* When I add it, I'll add @filename as a member
|
166
211
|
|
167
|
-
RS:
|
168
|
-
Stores the current "record separator" character. Since, by default, an input line is the input record, the default record separator character is a "newline".
|
169
|
-
* Will be @rs
|
170
|
-
* Currently, records are delimited by newline
|
171
|
-
|
172
212
|
|
173
213
|
=== Redundant
|
174
214
|
|
175
215
|
The following awk built-ins are redundant in ruby
|
176
216
|
|
177
|
-
NF:
|
178
|
-
Keeps a count of the number of fields in an input record. The last field in the input record can be designated by $NF.
|
179
|
-
* NF can be coded as 'every {|record| record.cols.size}'
|
180
|
-
* $NF can be coded as 'every {|record| record.cols.last}'
|
181
|
-
|
182
217
|
OFS:
|
183
218
|
Stores the "output field separator", which separates the fields when Awk prints them. The default is a "space" character.
|
184
219
|
* Ruby's string handling is far superior to awk's so there is no point in implementing a print routine
|
@@ -192,4 +227,15 @@ OFMT: Stores the format for numeric output. The default format is "%.6g".
|
|
192
227
|
|
193
228
|
== Using rawk inside a ruby program
|
194
229
|
|
195
|
-
|
230
|
+
Rawk code is evaluated within an instance of Rawk::Program. You can use rawk within your programs as follows...
|
231
|
+
|
232
|
+
require 'rubygems'
|
233
|
+
require 'rawk'
|
234
|
+
|
235
|
+
data = "foo\nbar"
|
236
|
+
program = Rawk::Program.new(data)
|
237
|
+
|
238
|
+
program.run do
|
239
|
+
every {|record| puts record.cols.first}
|
240
|
+
end
|
241
|
+
|
data/bin/rawk
CHANGED
data/lib/rawk/rawk.rb
CHANGED
@@ -3,26 +3,27 @@ require 'observer'
|
|
3
3
|
|
4
4
|
module Rawk
|
5
5
|
class Program
|
6
|
-
attr_reader :fs
|
6
|
+
attr_reader :fs, :rs
|
7
7
|
|
8
8
|
def initialize(io)
|
9
9
|
@start, @every, @finish = Set.new, Set.new, Set.new
|
10
|
+
initialize_builtins!
|
10
11
|
@input_stream = InputStream.new(io)
|
11
12
|
@input_stream.add_observer(self)
|
12
|
-
initialize_builtins!
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
16
16
|
def initialize_builtins!
|
17
17
|
@fs = " "
|
18
18
|
@nr = 0
|
19
|
+
@rs = "\n"
|
19
20
|
end
|
20
21
|
|
21
22
|
public
|
22
23
|
def on_new_line
|
23
24
|
@nr += 1
|
24
25
|
end
|
25
|
-
alias :update :on_new_line
|
26
|
+
alias :update :on_new_line # required by Observer
|
26
27
|
|
27
28
|
def run(code = "", &block)
|
28
29
|
load!(code, &block)
|
@@ -53,8 +54,9 @@ module Rawk
|
|
53
54
|
|
54
55
|
def execute_code!
|
55
56
|
@start.each {|b| b.call}
|
56
|
-
@input_stream.each_line do |
|
57
|
-
|
57
|
+
@input_stream.each_line(@rs) do |str|
|
58
|
+
record = Record.new(str, @fs, @rs)
|
59
|
+
@every.each {|b| b.call(record)}
|
58
60
|
end
|
59
61
|
@finish.each {|b| b.call}
|
60
62
|
end
|
@@ -67,8 +69,8 @@ module Rawk
|
|
67
69
|
@io = io
|
68
70
|
end
|
69
71
|
|
70
|
-
def each_line
|
71
|
-
@io.each_line do |line|
|
72
|
+
def each_line(separator)
|
73
|
+
@io.each_line(separator) do |line|
|
72
74
|
changed
|
73
75
|
notify_observers
|
74
76
|
yield line
|
@@ -76,9 +78,15 @@ module Rawk
|
|
76
78
|
end
|
77
79
|
end
|
78
80
|
|
79
|
-
class
|
80
|
-
def
|
81
|
-
|
81
|
+
class Record < String
|
82
|
+
def self.column_postion_accessor(name, position)
|
83
|
+
define_method(name.to_sym) do
|
84
|
+
cols[position]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def initialize(str, fs, eor = "\n")
|
89
|
+
self.replace(str.chomp(eor))
|
82
90
|
@fs = fs
|
83
91
|
end
|
84
92
|
|
@@ -90,5 +98,11 @@ module Rawk
|
|
90
98
|
def nf
|
91
99
|
cols.length
|
92
100
|
end
|
101
|
+
|
102
|
+
{:first => 0,:second => 1, :third => 2, :fourth => 3,:fifth => 4,
|
103
|
+
:sixth => 5,:seventh => 6,:eighth => 7,:ninth => 8, :tenth => 9
|
104
|
+
}.each do |name, column_position|
|
105
|
+
column_postion_accessor name, column_position
|
106
|
+
end
|
93
107
|
end
|
94
108
|
end
|
data/rawk.gemspec
CHANGED
data/spec/rawk/rawk_spec.rb
CHANGED
@@ -113,34 +113,56 @@ module Rawk
|
|
113
113
|
end
|
114
114
|
|
115
115
|
describe "support for standard awk built-in variables" do
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
116
|
+
describe "@nr (current record)" do
|
117
|
+
it "holds the current record number" do
|
118
|
+
record_nums = []
|
119
|
+
@program.run do
|
120
|
+
start {record_nums << @nr}
|
121
|
+
every {record_nums << @nr}
|
122
|
+
finish {record_nums << @nr}
|
123
|
+
end
|
124
|
+
record_nums.should == [0,1,2,3,3]
|
122
125
|
end
|
123
|
-
record_nums.should == [0,1,2,3,3]
|
124
126
|
end
|
125
127
|
|
126
|
-
describe "fs" do
|
128
|
+
describe "@fs (field separator)" do
|
127
129
|
it "holds the current field separator expression" do
|
128
130
|
@program.fs.should == " "
|
129
131
|
end
|
130
132
|
it "is applied to each line of data" do
|
131
133
|
data = "line"
|
132
|
-
|
134
|
+
Record.should_receive(:new).with(data, " ", "\n")
|
133
135
|
Program.new(data).run {every {|l| nil}}
|
134
136
|
end
|
135
137
|
it "can be changed by the user's program" do
|
136
138
|
data = "line"
|
137
|
-
|
139
|
+
Record.should_receive(:new).with(data, ",", "\n").and_return("dummy")
|
138
140
|
Program.new(data).run do
|
139
141
|
start {@fs = ','}
|
140
142
|
every {|l| nil}
|
141
143
|
end
|
142
144
|
end
|
143
145
|
end
|
146
|
+
|
147
|
+
describe "@rs (record separator)" do
|
148
|
+
it "defaults to a newline" do
|
149
|
+
@program.rs.should == "\n"
|
150
|
+
end
|
151
|
+
it "can be changed by the user" do
|
152
|
+
data = "line1.line2."
|
153
|
+
Record.should_receive(:new).
|
154
|
+
with("line1.", " ", ".").once.ordered.
|
155
|
+
and_return("")
|
156
|
+
Record.should_receive(:new).
|
157
|
+
with("line2.", " ", ".").once.ordered.
|
158
|
+
and_return("")
|
159
|
+
|
160
|
+
Program.new(data).run do
|
161
|
+
start {@rs = "."}
|
162
|
+
every {|rec| nil}
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
144
166
|
end
|
145
167
|
end
|
146
168
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Rawk
|
4
|
+
describe Record do
|
5
|
+
before do
|
6
|
+
@space = " "
|
7
|
+
@data = "a b c\n"
|
8
|
+
@record = Record.new(@data, @space)
|
9
|
+
end
|
10
|
+
it "is a string" do
|
11
|
+
@record.is_a?(String).should be_true
|
12
|
+
end
|
13
|
+
it "chomps itself on creation" do
|
14
|
+
@record.should == "a b c"
|
15
|
+
end
|
16
|
+
it "calculates and array of columns" do
|
17
|
+
@record.cols.should == ["a","b","c"]
|
18
|
+
@record.c.should == @record.cols
|
19
|
+
end
|
20
|
+
it "calculates the number of fields" do
|
21
|
+
@record.nf.should == 3
|
22
|
+
end
|
23
|
+
|
24
|
+
context "accessing columns by name" do
|
25
|
+
before do
|
26
|
+
@record = Record.new("a b c d e f g h i j", @space)
|
27
|
+
end
|
28
|
+
|
29
|
+
{ :first => "a", :second => "b", :third => "c",
|
30
|
+
:fourth => "d", :fifth => "e", :sixth => "f",
|
31
|
+
:seventh => "g", :eighth => "h", :ninth => "i",
|
32
|
+
:tenth => "j" }.each do |method, expected|
|
33
|
+
it "finds the #{method} column value" do
|
34
|
+
@record.send(method).should == expected
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context "with a field separator ','" do
|
40
|
+
it "splits the line into columns using a comma" do
|
41
|
+
record = Record.new("a,b,c d", ",")
|
42
|
+
record.c.should == ["a","b","c d"]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
context "with a regular expression field separator" do
|
46
|
+
it "splits the line into columns using the regular expression" do
|
47
|
+
record = Record.new("a,b|c", /[,|]/)
|
48
|
+
record.c.should == ["a","b","c"]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
context "with an explict end of line marker" do
|
52
|
+
it "chomps the end of line marker" do
|
53
|
+
record = Record.new("a b c.", @space, ".")
|
54
|
+
record.should == "a b c"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rawk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-08-
|
12
|
+
date: 2011-08-16 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! 'An awk-inspired ruby DSL
|
15
15
|
|
@@ -27,8 +27,8 @@ files:
|
|
27
27
|
- bin/rawk
|
28
28
|
- lib/rawk/rawk.rb
|
29
29
|
- rawk.gemspec
|
30
|
-
- spec/rawk/line_spec.rb
|
31
30
|
- spec/rawk/rawk_spec.rb
|
31
|
+
- spec/rawk/record_spec.rb
|
32
32
|
- spec/spec_helper.rb
|
33
33
|
homepage: https://github.com/mowat27/rawk
|
34
34
|
licenses: []
|
@@ -55,6 +55,6 @@ signing_key:
|
|
55
55
|
specification_version: 3
|
56
56
|
summary: An awk-inspired ruby DSL
|
57
57
|
test_files:
|
58
|
-
- spec/rawk/line_spec.rb
|
59
58
|
- spec/rawk/rawk_spec.rb
|
59
|
+
- spec/rawk/record_spec.rb
|
60
60
|
- spec/spec_helper.rb
|
data/spec/rawk/line_spec.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
module Rawk
|
4
|
-
describe Line do
|
5
|
-
before do
|
6
|
-
@fs = " "
|
7
|
-
@data = "a b c\n"
|
8
|
-
@line = Line.new(@data, @fs)
|
9
|
-
end
|
10
|
-
it "is a string" do
|
11
|
-
@line.is_a?(String).should be_true
|
12
|
-
end
|
13
|
-
it "chomps itself on creation" do
|
14
|
-
@line.should == "a b c"
|
15
|
-
end
|
16
|
-
it "finds space-delimited columns by" do
|
17
|
-
@line.cols.should == ["a","b","c"]
|
18
|
-
@line.c.should == @line.cols
|
19
|
-
end
|
20
|
-
it "calculates the number of fields" do
|
21
|
-
@line.nf.should == 3
|
22
|
-
end
|
23
|
-
|
24
|
-
context "with a field separator ','" do
|
25
|
-
it "splits the line into columns using a comma" do
|
26
|
-
line = Line.new("a,b,c d", ",")
|
27
|
-
line.c.should == ["a","b","c d"]
|
28
|
-
end
|
29
|
-
end
|
30
|
-
context "with a regular expression field separator" do
|
31
|
-
it "splits the line into columns using the regular expression" do
|
32
|
-
line = Line.new("a,b|c", /[,|]/)
|
33
|
-
line.c.should == ["a","b","c"]
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|