rbibtex 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,10 +2,10 @@ class BibTeX::Parser
2
2
 
3
3
  rule
4
4
 
5
- file : opt_space_element { result = File.new(Space.new(val[0])) }
5
+ file : opt_space_element { result = BibTeXFile.new(Space.new(val[0])) }
6
6
  | opt_space_element
7
- object_list
8
- opt_space_element { result = File.new(*([val[0]] + val[1] + [val[2]]) ) }
7
+ object_list
8
+ opt_space_element { result = BibTeXFile.new(*([val[0]] + val[1] + [val[2]]) ) }
9
9
  ;
10
10
 
11
11
  object_list : object { result = [val[0]] }
@@ -16,16 +16,24 @@ rule
16
16
  ;
17
17
 
18
18
  at_object : comment { result = val[0] }
19
+ | string { result = val[0] }
19
20
  | entry { result = val[0] }
20
21
  ;
21
22
 
22
- comment : COMMENT opt_space value { result = Comment.new(val[2]) }
23
+ string : STRING opt_space
24
+ LBRACE opt_space
25
+ WORD opt_space
26
+ EQUALS opt_space
27
+ value opt_space
28
+ RBRACE { result = Abbreviation.new(val[4], val[8]) }
29
+
30
+ comment : COMMENT opt_space value_element { result = Comment.new(val[2]) }
23
31
  ;
24
32
 
25
33
  entry : entry_head assignment_list
26
- RBRACE { val[0].properties = val[1]; result = val[0] }
34
+ RBRACE { val[0].merge val[1]; result = val[0] }
27
35
  | entry_head assignment_list /* Allow additional comma after last entry */
28
- COMMA opt_space RBRACE { val[0].properties = val[1]; result = val[0] }
36
+ COMMA opt_space RBRACE { val[0].merge val[1]; result = val[0] }
29
37
  | entry_head RBRACE { result = val[0] } /* Empty entry */
30
38
  ;
31
39
 
@@ -39,49 +47,60 @@ rule
39
47
  ;
40
48
 
41
49
  key_name : { result = "" }
42
- | CHARACTER
43
- | WORD
44
- | key_name CHARACTER { result << val[1] }
45
- | key_name WORD { result << val[1] }
50
+ | key_name_token { result = val[0] }
51
+ | key_name key_name_token { result << val[1] }
46
52
  ;
47
53
 
54
+ key_name_token : WORD
55
+ | CHARACTER
56
+ | number_as_str
57
+ ;
58
+
59
+
48
60
  assignment_list : assignment { result = val[0] }
49
61
  | assignment_list COMMA opt_space
50
62
  assignment { result.merge!(val[3]) }
51
63
  ;
52
64
 
53
65
  assignment : assignment_lhs opt_space
54
- EQUALS opt_space
55
- value opt_space { result = {val[0].downcase.to_sym => val[4]} }
66
+ EQUALS opt_space
67
+ value opt_space { result = {val[0].downcase.to_sym => val[4]} }
56
68
  ;
57
69
 
58
70
  assignment_lhs : WORD
59
71
  ;
60
72
 
61
- value : LBRACE value_content RBRACE { result = val[1] }
62
- | SINGLE_ANFZ value_content_single_del SINGLE_ANFZ { result = val[1] }
63
- | DOUBLE_ANFZ value_content_double_del DOUBLE_ANFZ { result = val[1] }
64
- | WORD
73
+ value : value_element { result = Value.new(val[0]) }
74
+ | value opt_space
75
+ CONCATENATION opt_space
76
+ value { result = val[0] + val[4] }
77
+ ;
78
+
79
+ value_element : LBRACE value_content RBRACE { result = val[1] }
80
+ | SINGLE_QUOTE value_content_single_del SINGLE_QUOTE { result = val[1] }
81
+ | DOUBLE_QUOTE value_content_double_del DOUBLE_QUOTE { result = val[1] }
82
+ | WORD { result = AbbreviationRef.new(val[0]) }
83
+ | NUMBER { result = val[0] }
65
84
  ;
66
85
 
67
86
  brace : LBRACE
68
87
  | RBRACE
69
88
  ;
70
89
 
71
- anfz : SINGLE_ANFZ
72
- | DOUBLE_ANFZ
90
+ anfz : SINGLE_QUOTE
91
+ | DOUBLE_QUOTE
73
92
  ;
74
93
 
75
94
  value_content_single_del : { result = "" }
76
95
  | value_content general_vc_token { result << val[1] }
77
96
  | value_content brace { result << val[1] }
78
- | value_content DOUBLE_ANFZ { result << val[1] }
97
+ | value_content DOUBLE_QUOTE { result << val[1] }
79
98
  ;
80
99
 
81
100
  value_content_double_del : { result = "" }
82
101
  | value_content general_vc_token { result << val[1] }
83
102
  | value_content brace { result << val[1] }
84
- | value_content SINGLE_ANFZ { result << val[1] }
103
+ | value_content SINGLE_QUOTE { result << val[1] }
85
104
  ;
86
105
 
87
106
  value_content : { result = "" }
@@ -93,12 +112,17 @@ rule
93
112
 
94
113
  general_vc_token : SPACE
95
114
  | WORD
115
+ | STRING
96
116
  | COMMA
97
117
  | EQUALS
98
118
  | CHARACTER
119
+ | CONCATENATION
99
120
  | AT
121
+ | number_as_str
100
122
  ;
101
123
 
124
+ number_as_str : NUMBER { result = val[0].to_s }
125
+
102
126
  opt_space_element :
103
127
  | space_element
104
128
  ;
@@ -117,37 +141,7 @@ end
117
141
 
118
142
  ---- header ----
119
143
 
120
- module BibTeX
121
- Entry = Struct.new(:type, :key, :properties)
122
-
123
- class Space < String; end
124
-
125
- class Comment
126
- def initialize(comment)
127
- @comment = comment
128
- end
129
-
130
- def to_s
131
- @comment
132
- end
133
-
134
- alias :to_str :to_s
135
- end
136
-
137
- class File
138
- def initialize(*elements)
139
- @elements = elements.compact
140
- end
141
-
142
- def elements
143
- @elements
144
- end
145
-
146
- def entries
147
- @elements.select { | e | e.is_a?Entry }
148
- end
149
- end
150
- end
144
+ require "rbibtex/types"
151
145
 
152
146
  ---- inner ----
153
147
 
@@ -158,7 +152,7 @@ end
158
152
  def parse( str )
159
153
  @q = []
160
154
 
161
- # @yydebug = true
155
+ #@yydebug = true
162
156
 
163
157
  while str.size > 0 do
164
158
  case str
@@ -166,6 +160,8 @@ end
166
160
  @q.push [:SPACE, $&]
167
161
  when /\Acomment/io
168
162
  @q.push [:COMMENT, $&]
163
+ when /\Astring/io
164
+ @q.push [:STRING, $&]
169
165
  when /\A\,/o
170
166
  @q.push [:COMMA, $&]
171
167
  when /\A@/o
@@ -177,11 +173,15 @@ end
177
173
  when /\A\=/o
178
174
  @q.push [:EQUALS, $&]
179
175
  when /\A\'/o
180
- @q.push [:SINGLE_ANFZ, $&]
176
+ @q.push [:SINGLE_QUOTE, $&]
181
177
  when /\A\"/o
182
- @q.push [:DOUBLE_ANFZ, $&]
178
+ @q.push [:DOUBLE_QUOTE, $&]
179
+ when /\A\d+\b/o
180
+ @q.push [:NUMBER, $&.to_i]
183
181
  when /\A\w+/o
184
182
  @q.push [:WORD, $&]
183
+ when /\A#/o
184
+ @q.push [:CONCATENATION, $&]
185
185
  when /\A./o
186
186
  @q.push [:CHARACTER, $&]
187
187
  when /\A\d+/o
@@ -199,4 +199,8 @@ end
199
199
  end
200
200
 
201
201
 
202
+ def on_error(*args)
203
+ p args
204
+ super
205
+ end
202
206
  ---- footer ----
@@ -0,0 +1,64 @@
1
+ # Some string handling routines for wordwrapping and tabifying.
2
+ class String
3
+ # Do a wordwrap. Will not work correctly if the string contains tab characters
4
+ def wrap(width = 78)
5
+ raise "Invalid width" if width < 0
6
+ self.split("\n", -1).map { | line |
7
+ indent = line[/\A\s*/]
8
+ line = line[/\A\s*(.*)\Z/, 1]
9
+ result = [""]
10
+ line.split(/(?=\s+)/).each do | token |
11
+ if result[-1].length + token.length <= width
12
+ result[-1] << token
13
+ else
14
+ result << token[/\A\s*(.*)\Z/, 1]
15
+ end
16
+ end
17
+ result.shift if result.first == ""
18
+ result.map{ |subline| indent + subline }
19
+ }.join("\n")
20
+ end
21
+
22
+ # Indent everything but the first line
23
+ def indent_hanging(indent = 2)
24
+ first, rest = *self.split("\n", 2)
25
+ return "" unless first
26
+ if rest
27
+ first + "\n" + (rest.indent(indent))
28
+ else
29
+ first
30
+ end
31
+ end
32
+
33
+ # Indent by indent spaces
34
+ def indent(indent = 2)
35
+ self.gsub(/^/, " " * indent)
36
+ end
37
+
38
+ # Substitute tabs with spaces
39
+ def untabify(tab_width = 8)
40
+ self.gsub(/\t/, " "*tab_width)
41
+ end
42
+
43
+ # Deindent ignoring the first line. Will not work correctly if the string contains tab characters
44
+ def deindent_hanging(indent = self.scan(/(?:\n)(\s*)(?=\S)/).flatten.map{|e|e.length}.min)
45
+ deindent(indent)
46
+ end
47
+
48
+ def remove_trailing_whitespace
49
+ self.gsub(/\s+$/, "")
50
+ end
51
+
52
+ # Remove indentation. The optional argument gives the number of spaces to indent by. Will not work correctly if the string contains tab characters.
53
+ def deindent(indent = self.scan(/^\s*/).map{|e|e.length}.min)
54
+ self.gsub(/^ {0,#{indent}}/, "")
55
+ end
56
+
57
+ # Change paragraphs (delimited by two blank lines) into one liners. Indented text is left as is.
58
+ def unwrap
59
+ self.sub(/\A\n+/i, '').split(/\n(?:\s*\n)+/).map { | paragraph |
60
+ paragraph.gsub(/((?:\n|^)\S[^\n]*)\n(?=\S)/, "\\1 ")
61
+ }.join("\n\n")
62
+ end
63
+ end
64
+
@@ -0,0 +1,75 @@
1
+ module BibTeX
2
+
3
+ # Contains modules that can be included in OptionParser parsers and create
4
+ # switches there. Be sure to call super in the initialize methods.
5
+ module Options
6
+
7
+ # Module to be included into an OptionParser parser. Creates output, help and
8
+ # version switches.
9
+ #
10
+ # This depends on a VERSION Constant being declared in the parser.
11
+ module ApplicationOptions
12
+ attr_reader :output
13
+
14
+ def initialize
15
+ super()
16
+
17
+ @output = STDOUT
18
+ self.on("-o", "--output FILENAME", String,
19
+ "Set the output filename. Writes to stdout if nothing is given") { | v | @output = File.open(v, 'w') }
20
+ self.separator ""
21
+ self.on("-?", "--help",
22
+ "Show this help") { puts self; exit }
23
+ self.on("-v", "--version",
24
+ "Output version number and exit") { puts VERSION; exit }
25
+ end
26
+ end
27
+
28
+ # Module to be included into an OptionParser parser. Creates sort by switch.
29
+ module SortOptions
30
+ attr_reader :sort_by
31
+
32
+ def initialize
33
+ super()
34
+
35
+ self.separator ""
36
+ self.separator "Sorting of entries"
37
+ self.on("-s", "--sort-by", "--sort_by Field_1,Field2,...", Array,
38
+ "Specify fields that shall be used for sorting of the file") { | v | @sort_by = v.map { | e | e.downcase.to_sym } }
39
+ end
40
+
41
+ def parse!(*args)
42
+ super
43
+ raise "You can't specify an empty list of fields for sorting" if @sort_by and @sort_by.empty?
44
+ end
45
+ end
46
+
47
+ # Module to be included in an OptionParser parse. Creates top_fields and
48
+ # bottom_fields switches.
49
+ module SortFieldsOptions
50
+ attr_reader :top_fields, :bottom_fields
51
+
52
+ def initialize
53
+ super()
54
+
55
+ @top_fields = [:author, :title, :year]
56
+ @bottom_fields = [:note, :pdf, :ps, :www]
57
+
58
+ self.separator ""
59
+ self.separator "Sorting of fields"
60
+ self.on("-t", "--top-fields", "--top_fields Field_1,Field2,...", Array,
61
+ "Specify a list of fields that are written first in the output") { | v |
62
+ @top_fields = v.map { | e | e.downcase.to_sym }
63
+ @bottom_fields -= @top_fields
64
+ }
65
+ self.on("-b", "--bottom-fields", "--bottom_fields Field_1,Field2,...", Array,
66
+ "Specify a list of fields that are written last in the output") { | v |
67
+ @bottom_fields = v.map { | e | e.downcase.to_sym }
68
+ @top_fields -= @bottom_fields
69
+ }
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,344 @@
1
+ # TODO: Do not modify base class. (I want selector namespaces :-(
2
+ require "rbibtex/string"
3
+
4
+ module BibTeX
5
+ class Entry
6
+ attr_accessor :type, :key
7
+
8
+ def self.norm_key(key)
9
+ key.to_s.downcase.to_sym
10
+ end
11
+
12
+ private
13
+ def norm_key(*args)
14
+ self.class.norm_key(*args)
15
+ end
16
+
17
+ public
18
+
19
+ CAPITALIZATION = Hash.new { | h, k |
20
+ if k == norm_key(k)
21
+ k.to_s.capitalize
22
+ else
23
+ h[norm_key(k)]
24
+ end
25
+ }
26
+ CAPITALIZATION.merge!({
27
+ :additional => "additional",
28
+ :booktitle => "BookTitle",
29
+ :isbn => "isbn",
30
+ :issn => "issn",
31
+ :pdf => "pdf",
32
+ :ps => "ps"
33
+ })
34
+
35
+ public
36
+ def initialize(type, key, properties = {})
37
+ @properties = properties.inject({}) { | r, (k, v) | r[norm_key(k)] = v; r }
38
+ @key = key
39
+ @type = type
40
+ end
41
+
42
+ def method_missing(method, *args)
43
+ case args.length
44
+ when 0
45
+ if has_key?(method)
46
+ return self[method]
47
+ else
48
+ super
49
+ end
50
+ when 1
51
+ if /\A(.*)=\Z/ =~ method.to_s
52
+ self[$1] = args[0]
53
+ else
54
+ super
55
+ end
56
+ else
57
+ super
58
+ end
59
+ end
60
+
61
+ def respond_to?(method, *args)
62
+ case args.length
63
+ when 0
64
+ if has_key?(method)
65
+ true
66
+ else
67
+ super
68
+ end
69
+ when 1
70
+ if /\A(.*)=\Z/ =~ method.to_s
71
+ true
72
+ else
73
+ super
74
+ end
75
+ else
76
+ super
77
+ end
78
+ end
79
+
80
+ def normalize
81
+ res = self.class.new(self.type, self.key, @properties.inject({}) { | r, (k, v) |
82
+ r[k] = v.map { | ve |
83
+ if ve.is_a?AbbreviationRef
84
+ ve
85
+ elsif ve.is_a?String
86
+ if /\A\d+\Z/ =~ ve
87
+ Integer(ve)
88
+ elsif ve.respond_to?:deindent_hanging
89
+ ve.deindent_hanging.unwrap
90
+ end
91
+ else
92
+ ve
93
+ end
94
+ }
95
+ r
96
+ })
97
+ end
98
+
99
+ def has_key?(key)
100
+ @properties.has_key?(norm_key(key))
101
+ end
102
+
103
+ def keys
104
+ @properties.keys + [:type, :key]
105
+ end
106
+
107
+ def merge(hash)
108
+ hash.each do | (k, v) |
109
+ self[k] = v
110
+ end
111
+ end
112
+
113
+ def each &block
114
+ @properties.each &block
115
+ end
116
+ include Enumerable
117
+
118
+ def [](key)
119
+ case norm_key(key)
120
+ when :key
121
+ @key
122
+ when :type
123
+ @type
124
+ else
125
+ @properties[norm_key(key)]
126
+ end
127
+ end
128
+
129
+ def []=(key, value)
130
+ case norm_key(key)
131
+ when :key
132
+ @key = value
133
+ when :type
134
+ @type = value
135
+ else
136
+ @properties[norm_key(key)] = value
137
+ end
138
+ end
139
+ end
140
+
141
+ class Space < String; end
142
+
143
+ class Comment < String
144
+ def normalize
145
+ Comment.new(self.deindent_hanging.unwrap)
146
+ end
147
+ end
148
+
149
+ class Value
150
+ attr_reader :elements
151
+
152
+ def initialize(*elements)
153
+ @elements = elements
154
+ end
155
+
156
+ def to_s
157
+ @elements.map { | e | e.to_s }.join("")
158
+ end
159
+
160
+ def +(o)
161
+ self.class.new(*(@elements + o.elements))
162
+ end
163
+
164
+ def each(&block)
165
+ @elements.each(&block)
166
+ self
167
+ end
168
+ include Enumerable
169
+
170
+ # map each element of the value onto a new value
171
+ def map!(&block)
172
+ @elements.map!(&block)
173
+ end
174
+
175
+ # Create a new Value object where each element is replaced by its map'ed element
176
+ def map(&block)
177
+ Value.new(*@elements.map(&block))
178
+ end
179
+
180
+ def to_bib
181
+ @elements.map { | ve |
182
+ if ve.is_a?Numeric
183
+ ve.to_s
184
+ elsif ve.respond_to?:to_bib
185
+ ve.to_bib
186
+ elsif ve.respond_to?:to_str
187
+ %({#{ve.to_str}})
188
+ else
189
+ raise "Don't know how to display #{ve.inspect} as a bibtex element"
190
+ end
191
+ }.join(" # ")
192
+ end
193
+
194
+ # Create a flat version, where each element that is itself a value is included directly
195
+ def flatten
196
+ self.class.new(
197
+ *@elements.map { | e | e.respond_to?(:flatten) ? e.flatten : e}.
198
+ map { | e | e.respond_to?(:elements) ? e.elements : e}.
199
+ flatten
200
+ )
201
+ end
202
+
203
+ # Create a normalized version where:
204
+ # - strings following each other are joined together
205
+ # - Elements of the Value are normalized
206
+ # -
207
+ def normalize
208
+ self.class.new(
209
+ *self.flatten.elements.
210
+ inject([]) { | r, e |
211
+ if r[-1].is_a?String and e.is_a?String
212
+ r[-1] += e
213
+ else
214
+ r << e
215
+ end
216
+ r
217
+ }
218
+ )
219
+ end
220
+ end
221
+
222
+ class AbbreviationRef < String; end
223
+ class Abbreviation
224
+ attr_accessor :key, :string
225
+
226
+ def initialize(key, string)
227
+ @key, @string = key, string
228
+ end
229
+
230
+ def to_s
231
+ @string
232
+ end
233
+
234
+ def to_bib
235
+ @key.dup
236
+ end
237
+
238
+ def normalize
239
+ self.class.new(@key, @string.map { | e | e.is_a?(String) ? e.deindent_hanging.unwrap : e })
240
+ end
241
+
242
+ alias :to_str :to_s
243
+ end
244
+
245
+ class BibTeXFile
246
+
247
+ private
248
+
249
+ def norm_key(key)
250
+ key.to_s.downcase.to_sym
251
+ end
252
+
253
+ public
254
+
255
+ attr_reader :elements
256
+
257
+ def each &block
258
+ @elements.each &block
259
+ end
260
+ include Enumerable
261
+
262
+ def initialize(*elements)
263
+ @elements = elements.compact
264
+ end
265
+
266
+ # Create a File from a string
267
+ def self.parse(string)
268
+ Parser.new.parse(string)
269
+ end
270
+
271
+ # Return all entries in this file.
272
+ def entries
273
+ @elements.select { | e | e.is_a?Entry }
274
+ end
275
+
276
+ # When keys are given sorts the bibtex file according to the keys,
277
+ # discarding any empty spaces and sorting comments and strings to the top.
278
+ #
279
+ # When a block is given the sortorder need to be inferred in the block
280
+ def sort_by(*keys)
281
+ raise "Give either a block or keys to sort on" if block_given? and !keys.empty?
282
+ self.normalize!
283
+ @elements = @elements.sort_by { | element |
284
+ case element
285
+ when Abbreviation
286
+ [0, element.key]
287
+ when Comment
288
+ [1, element.to_s]
289
+ when Entry
290
+ [2, keys.map { | k | (element[k] || "").to_s }]
291
+ else
292
+ [10]
293
+ end
294
+ }
295
+ end
296
+
297
+ # Return an array with the abbrevations in this file
298
+ def abbreviations
299
+ @elements.select { | element | element.is_a?Abbreviation }
300
+ end
301
+
302
+ # Get an abbrevation by key
303
+ def abbreviation(key)
304
+ abbreviations.select { | a | norm_key(a.key) == norm_key(key) }.first
305
+ end
306
+
307
+ # Check if abbreviation is defined
308
+ def has_abbreviation?(key)
309
+ abbreviation(key) != nil
310
+ end
311
+
312
+ # - Remove additional whitespace from the filelist
313
+ # - Deindent and unwrap text
314
+ def normalize!
315
+ @elements = @elements.reject { | e | e.is_a?Space }
316
+ @elements.map! { | e |
317
+ if e.respond_to?:normalize
318
+ next e.normalize
319
+ end
320
+ }
321
+ end
322
+
323
+ # Replace abbreviations by their string equivalent. (Does not replace
324
+ # predefined abbreviations like jan, feb if they are not explicitely
325
+ # specified in this bibtex file
326
+ def interpolate!
327
+ self.entries.each do | entry |
328
+ entry.each do | (k, v) |
329
+ interpolated = v.map{ | e |
330
+ if e.is_a?AbbreviationRef and has_abbreviation?(e)
331
+ abbreviation(e).string
332
+ else
333
+ e
334
+ end
335
+ }
336
+ shrunk = interpolated.normalize
337
+ entry[k] = Value.new(*shrunk)
338
+ end
339
+ end
340
+ end
341
+ end
342
+ end
343
+
344
+
data/lib/rbibtex.rb CHANGED
@@ -1 +1,8 @@
1
1
  require "rbibtex/rbibtex.tab"
2
+
3
+ module BibTeX
4
+ # Convenience function parsing a string into a BibTeX::File object.
5
+ def self.parse(string)
6
+ Parser.new.parse(string)
7
+ end
8
+ end