rawk 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -10,9 +10,13 @@ Obviously, you can use ruby -e and {other magic}[http://code.joejag.com/2009/usi
10
10
 
11
11
  == Install
12
12
 
13
- Clone the git repo or download the file as a .tar or .zip. The bin directory contains the rawk executable. Does not require any gems (except rspec if you want to run the tests)
13
+ From the command line
14
14
 
15
- I will package rawk as a gem when I find a spare moment.
15
+ gem install rawk
16
+
17
+ Using bundler
18
+
19
+ gem "rawk", "~> 0.1.2"
16
20
 
17
21
  == Example
18
22
 
@@ -35,7 +39,7 @@ Creates the following output
35
39
 
36
40
  This can be written using rawk as
37
41
 
38
- $ ls -ltr | bin/rawk '
42
+ $ ls -ltr | rawk '
39
43
  start {puts "Starting..."}
40
44
  every {|record| puts "#{record.cols[8]} #{record.cols[0]}"}
41
45
  finish {puts "done"} '
@@ -64,7 +68,7 @@ Runs before any lines are read from the input stream. Equivalent to a BEGIN con
64
68
 
65
69
  every {|record| <code>}
66
70
 
67
- Runs once for each line of input data. Yields an object of type Line (see below)
71
+ Runs once for each line of input data. Yields an object of type Record (see below)
68
72
  Equivalent to an anonymous block such as awk '{print $1}'
69
73
 
70
74
  finish {<code>}
@@ -74,7 +78,7 @@ Equivalent to an END condition in awk
74
78
 
75
79
  You can provide multiple blocks of code for each condition.
76
80
 
77
- ls -ltr | head -2 | bin/rawk '
81
+ ls -ltr | head -2 | rawk '
78
82
  every {|record| puts 1}
79
83
  every {|record| puts 2} '
80
84
 
@@ -89,11 +93,11 @@ prints
89
93
 
90
94
  * Conditional blocks
91
95
 
92
- == Lines
96
+ == Records
93
97
 
94
- every yields an object of type Line which is subclass of String that adds a cols method to access columns. The cols method returns an array of column values.
98
+ every yields an object of type Record which is subclass of String that adds a cols method to access columns. The cols method returns an array of column values.
95
99
 
96
- echo "hello world" | bin/rawk 'every do |record|
100
+ echo "hello world" | rawk 'every do |record|
97
101
  puts "#{record.cols.length} columns: #{record.cols.join(",")}"
98
102
  end'
99
103
 
@@ -101,19 +105,25 @@ every yields an object of type Line which is subclass of String that adds a cols
101
105
 
102
106
  Note that cols is aliased to c for convenience
103
107
 
104
- echo "hello world" | bin/rawk 'every do |record|
108
+ echo "hello world" | rawk 'every do |record|
105
109
  puts record.c[0]
106
110
  end'
107
111
 
108
112
  -> hello
113
+
114
+ In most cases you will be dealing with a few columns of data so Record provides functions that allow you to access columns the first 10 columns directly by position name.
109
115
 
116
+ echo hello world from me | rawk 'every {|r| puts "#{r.first} #{r.third}"}'
117
+
118
+ -> hello from
119
+
110
120
  == Functions, classes and other ruby stuff
111
121
 
112
- You can use ruby as normal before specifying any blocks. For example...
122
+ You can use ruby as normal. For example...
113
123
 
114
124
  Functions
115
125
 
116
- echo hello world | bin/rawk '
126
+ echo hello world | rawk '
117
127
  def print_first_column(record)
118
128
  puts record.cols.first
119
129
  end
@@ -121,7 +131,7 @@ Functions
121
131
 
122
132
  Classes
123
133
 
124
- echo hello world | bin/rawk '
134
+ echo hello world | rawk '
125
135
  class Printer
126
136
  def self.print_first(record)
127
137
  puts record.cols.first
@@ -133,28 +143,63 @@ Requires and gems
133
143
 
134
144
  require works as you would expect although rubygems is not required by default.
135
145
 
136
- echo "ruby" | bin/rawk '
146
+ echo "ruby" | rawk '
137
147
  require "rubygems"
138
148
  require "active_support/all"
139
149
  every {|record| puts record.cols.first.pluralize} '
140
150
 
141
151
  -> rubies
142
152
 
153
+ == Variables and Scope
154
+
155
+ Variables defined inside the condition blocks (start, every, finish) are local to that block. Create a member variables to share state between blocks.
156
+
157
+ ls | tail -2 | rawk '
158
+ start do
159
+ local = "foo"
160
+ @shared = "bar"
161
+ puts "Starting with #{local}"
162
+ end
163
+ every {|record| puts "Running with #{@shared}"} '
164
+
165
+ -> Starting with foo
166
+ Running with bar
167
+ Running with bar
168
+
143
169
  == Builtins
144
170
 
145
171
  rawk provides builtins as member variables. You can change them as you see fit.
146
172
 
147
173
  @nr holds the current record number
148
- ls -ltr | head -2 | bin/rawk 'every {puts @nr}'
174
+ ls -ltr | head -2 | rawk 'every {puts @nr}'
149
175
 
150
176
  @fs specifies the field separator applied to each record
151
177
 
152
- echo "foo.bar" | bin/rawk '
178
+ echo "foo.bar" | rawk '
153
179
  start {@fs="."}
154
180
  every {|record| puts "1: #{record.cols[0]} 2: #{record.cols[1]}"} '
155
181
 
156
182
  -> 1: foo 2: bar
157
183
 
184
+ @rs specifies the record separator" character
185
+ * Defaults to newline
186
+ * Note that, unlike awk, @rs can only be set in the start block. It cannot be changed "in flight"
187
+
188
+ ksh print -n "foo.bar." | bin/rawk '
189
+ start {@rs = "."}
190
+ every {|r| puts r.cols.first} '
191
+
192
+ -> foo
193
+ bar
194
+
195
+ NF: Keeps a count of the number of fields in an input record. The last field in the input record can be designated by $NF.
196
+ * Each Record yielded by the every block has a '.nf' method
197
+ * $NF can be coded as 'every {|record| record.cols.last}'
198
+
199
+ echo "foo bar" | rawk 'every {|record| puts "#{record.nf} fields"}'
200
+
201
+ -> 2 fields
202
+
158
203
  === Not supported (yet)
159
204
 
160
205
  I'm working on support for the following awk built-ins
@@ -164,21 +209,11 @@ Contains the name of the current input-file.
164
209
  * Reading input data is not supported yet
165
210
  * When I add it, I'll add @filename as a member
166
211
 
167
- RS:
168
- Stores the current "record separator" character. Since, by default, an input line is the input record, the default record separator character is a "newline".
169
- * Will be @rs
170
- * Currently, records are delimited by newline
171
-
172
212
 
173
213
  === Redundant
174
214
 
175
215
  The following awk built-ins are redundant in ruby
176
216
 
177
- NF:
178
- Keeps a count of the number of fields in an input record. The last field in the input record can be designated by $NF.
179
- * NF can be coded as 'every {|record| record.cols.size}'
180
- * $NF can be coded as 'every {|record| record.cols.last}'
181
-
182
217
  OFS:
183
218
  Stores the "output field separator", which separates the fields when Awk prints them. The default is a "space" character.
184
219
  * Ruby's string handling is far superior to awk's so there is no point in implementing a print routine
@@ -192,4 +227,15 @@ OFMT: Stores the format for numeric output. The default format is "%.6g".
192
227
 
193
228
  == Using rawk inside a ruby program
194
229
 
195
- Technically, you can require rawk and use it's classes directly. However, it will be messy until I add gem packaging so I'll wait until you can install rawk as a gem before I go ahead an document it.
230
+ Rawk code is evaluated within an instance of Rawk::Program. You can use rawk within your programs as follows...
231
+
232
+ require 'rubygems'
233
+ require 'rawk'
234
+
235
+ data = "foo\nbar"
236
+ program = Rawk::Program.new(data)
237
+
238
+ program.run do
239
+ every {|record| puts record.cols.first}
240
+ end
241
+
data/bin/rawk CHANGED
@@ -5,7 +5,8 @@ $: << File.join(PROJECT_DIR, 'lib')
5
5
 
6
6
  require 'rawk/rawk'
7
7
 
8
- DATA = STDIN
9
- DSL_CODE = ARGV[0]
10
-
11
- Rawk::Program.new(DATA).run(DSL_CODE)
8
+ module Rawk
9
+ DATA = STDIN
10
+ DSL_CODE = ARGV[0]
11
+ Program.new(DATA).run(DSL_CODE) if DSL_CODE
12
+ end
data/lib/rawk/rawk.rb CHANGED
@@ -3,26 +3,27 @@ require 'observer'
3
3
 
4
4
  module Rawk
5
5
  class Program
6
- attr_reader :fs
6
+ attr_reader :fs, :rs
7
7
 
8
8
  def initialize(io)
9
9
  @start, @every, @finish = Set.new, Set.new, Set.new
10
+ initialize_builtins!
10
11
  @input_stream = InputStream.new(io)
11
12
  @input_stream.add_observer(self)
12
- initialize_builtins!
13
13
  end
14
14
 
15
15
  private
16
16
  def initialize_builtins!
17
17
  @fs = " "
18
18
  @nr = 0
19
+ @rs = "\n"
19
20
  end
20
21
 
21
22
  public
22
23
  def on_new_line
23
24
  @nr += 1
24
25
  end
25
- alias :update :on_new_line
26
+ alias :update :on_new_line # required by Observer
26
27
 
27
28
  def run(code = "", &block)
28
29
  load!(code, &block)
@@ -53,8 +54,9 @@ module Rawk
53
54
 
54
55
  def execute_code!
55
56
  @start.each {|b| b.call}
56
- @input_stream.each_line do |row|
57
- @every.each {|b| b.call(Line.new(row, @fs))}
57
+ @input_stream.each_line(@rs) do |str|
58
+ record = Record.new(str, @fs, @rs)
59
+ @every.each {|b| b.call(record)}
58
60
  end
59
61
  @finish.each {|b| b.call}
60
62
  end
@@ -67,8 +69,8 @@ module Rawk
67
69
  @io = io
68
70
  end
69
71
 
70
- def each_line
71
- @io.each_line do |line|
72
+ def each_line(separator)
73
+ @io.each_line(separator) do |line|
72
74
  changed
73
75
  notify_observers
74
76
  yield line
@@ -76,9 +78,15 @@ module Rawk
76
78
  end
77
79
  end
78
80
 
79
- class Line < String
80
- def initialize(str, fs)
81
- self.replace(str.chomp)
81
+ class Record < String
82
+ def self.column_postion_accessor(name, position)
83
+ define_method(name.to_sym) do
84
+ cols[position]
85
+ end
86
+ end
87
+
88
+ def initialize(str, fs, eor = "\n")
89
+ self.replace(str.chomp(eor))
82
90
  @fs = fs
83
91
  end
84
92
 
@@ -90,5 +98,11 @@ module Rawk
90
98
  def nf
91
99
  cols.length
92
100
  end
101
+
102
+ {:first => 0,:second => 1, :third => 2, :fourth => 3,:fifth => 4,
103
+ :sixth => 5,:seventh => 6,:eighth => 7,:ninth => 8, :tenth => 9
104
+ }.each do |name, column_position|
105
+ column_postion_accessor name, column_position
106
+ end
93
107
  end
94
108
  end
data/rawk.gemspec CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
2
2
 
3
3
  SPEC = Gem::Specification.new do |s|
4
4
  s.name = "rawk"
5
- s.version = '0.1.2'
5
+ s.version = '0.1.3'
6
6
  s.author = "Adrian Mowat"
7
7
  s.homepage = "https://github.com/mowat27/rawk"
8
8
  s.summary = "An awk-inspired ruby DSL"
@@ -113,34 +113,56 @@ module Rawk
113
113
  end
114
114
 
115
115
  describe "support for standard awk built-in variables" do
116
- it "calculates the current record num as nr" do
117
- record_nums = []
118
- @program.run do
119
- start {record_nums << @nr}
120
- every {record_nums << @nr}
121
- finish {record_nums << @nr}
116
+ describe "@nr (current record)" do
117
+ it "holds the current record number" do
118
+ record_nums = []
119
+ @program.run do
120
+ start {record_nums << @nr}
121
+ every {record_nums << @nr}
122
+ finish {record_nums << @nr}
123
+ end
124
+ record_nums.should == [0,1,2,3,3]
122
125
  end
123
- record_nums.should == [0,1,2,3,3]
124
126
  end
125
127
 
126
- describe "fs" do
128
+ describe "@fs (field separator)" do
127
129
  it "holds the current field separator expression" do
128
130
  @program.fs.should == " "
129
131
  end
130
132
  it "is applied to each line of data" do
131
133
  data = "line"
132
- Line.should_receive(:new).with(data, " ")
134
+ Record.should_receive(:new).with(data, " ", "\n")
133
135
  Program.new(data).run {every {|l| nil}}
134
136
  end
135
137
  it "can be changed by the user's program" do
136
138
  data = "line"
137
- Line.should_receive(:new).with(data, ",").and_return("dummy")
139
+ Record.should_receive(:new).with(data, ",", "\n").and_return("dummy")
138
140
  Program.new(data).run do
139
141
  start {@fs = ','}
140
142
  every {|l| nil}
141
143
  end
142
144
  end
143
145
  end
146
+
147
+ describe "@rs (record separator)" do
148
+ it "defaults to a newline" do
149
+ @program.rs.should == "\n"
150
+ end
151
+ it "can be changed by the user" do
152
+ data = "line1.line2."
153
+ Record.should_receive(:new).
154
+ with("line1.", " ", ".").once.ordered.
155
+ and_return("")
156
+ Record.should_receive(:new).
157
+ with("line2.", " ", ".").once.ordered.
158
+ and_return("")
159
+
160
+ Program.new(data).run do
161
+ start {@rs = "."}
162
+ every {|rec| nil}
163
+ end
164
+ end
165
+ end
144
166
  end
145
167
  end
146
168
  end
@@ -0,0 +1,58 @@
1
+ require 'spec_helper'
2
+
3
+ module Rawk
4
+ describe Record do
5
+ before do
6
+ @space = " "
7
+ @data = "a b c\n"
8
+ @record = Record.new(@data, @space)
9
+ end
10
+ it "is a string" do
11
+ @record.is_a?(String).should be_true
12
+ end
13
+ it "chomps itself on creation" do
14
+ @record.should == "a b c"
15
+ end
16
+ it "calculates and array of columns" do
17
+ @record.cols.should == ["a","b","c"]
18
+ @record.c.should == @record.cols
19
+ end
20
+ it "calculates the number of fields" do
21
+ @record.nf.should == 3
22
+ end
23
+
24
+ context "accessing columns by name" do
25
+ before do
26
+ @record = Record.new("a b c d e f g h i j", @space)
27
+ end
28
+
29
+ { :first => "a", :second => "b", :third => "c",
30
+ :fourth => "d", :fifth => "e", :sixth => "f",
31
+ :seventh => "g", :eighth => "h", :ninth => "i",
32
+ :tenth => "j" }.each do |method, expected|
33
+ it "finds the #{method} column value" do
34
+ @record.send(method).should == expected
35
+ end
36
+ end
37
+ end
38
+
39
+ context "with a field separator ','" do
40
+ it "splits the line into columns using a comma" do
41
+ record = Record.new("a,b,c d", ",")
42
+ record.c.should == ["a","b","c d"]
43
+ end
44
+ end
45
+ context "with a regular expression field separator" do
46
+ it "splits the line into columns using the regular expression" do
47
+ record = Record.new("a,b|c", /[,|]/)
48
+ record.c.should == ["a","b","c"]
49
+ end
50
+ end
51
+ context "with an explict end of line marker" do
52
+ it "chomps the end of line marker" do
53
+ record = Record.new("a b c.", @space, ".")
54
+ record.should == "a b c"
55
+ end
56
+ end
57
+ end
58
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rawk
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-08-14 00:00:00.000000000Z
12
+ date: 2011-08-16 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: ! 'An awk-inspired ruby DSL
15
15
 
@@ -27,8 +27,8 @@ files:
27
27
  - bin/rawk
28
28
  - lib/rawk/rawk.rb
29
29
  - rawk.gemspec
30
- - spec/rawk/line_spec.rb
31
30
  - spec/rawk/rawk_spec.rb
31
+ - spec/rawk/record_spec.rb
32
32
  - spec/spec_helper.rb
33
33
  homepage: https://github.com/mowat27/rawk
34
34
  licenses: []
@@ -55,6 +55,6 @@ signing_key:
55
55
  specification_version: 3
56
56
  summary: An awk-inspired ruby DSL
57
57
  test_files:
58
- - spec/rawk/line_spec.rb
59
58
  - spec/rawk/rawk_spec.rb
59
+ - spec/rawk/record_spec.rb
60
60
  - spec/spec_helper.rb
@@ -1,37 +0,0 @@
1
- require 'spec_helper'
2
-
3
- module Rawk
4
- describe Line do
5
- before do
6
- @fs = " "
7
- @data = "a b c\n"
8
- @line = Line.new(@data, @fs)
9
- end
10
- it "is a string" do
11
- @line.is_a?(String).should be_true
12
- end
13
- it "chomps itself on creation" do
14
- @line.should == "a b c"
15
- end
16
- it "finds space-delimited columns by" do
17
- @line.cols.should == ["a","b","c"]
18
- @line.c.should == @line.cols
19
- end
20
- it "calculates the number of fields" do
21
- @line.nf.should == 3
22
- end
23
-
24
- context "with a field separator ','" do
25
- it "splits the line into columns using a comma" do
26
- line = Line.new("a,b,c d", ",")
27
- line.c.should == ["a","b","c d"]
28
- end
29
- end
30
- context "with a regular expression field separator" do
31
- it "splits the line into columns using the regular expression" do
32
- line = Line.new("a,b|c", /[,|]/)
33
- line.c.should == ["a","b","c"]
34
- end
35
- end
36
- end
37
- end