rbibtex 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,10 +2,10 @@ class BibTeX::Parser
2
2
 
3
3
  rule
4
4
 
5
- file : opt_space_element { result = File.new(Space.new(val[0])) }
5
+ file : opt_space_element { result = BibTeXFile.new(Space.new(val[0])) }
6
6
  | opt_space_element
7
- object_list
8
- opt_space_element { result = File.new(*([val[0]] + val[1] + [val[2]]) ) }
7
+ object_list
8
+ opt_space_element { result = BibTeXFile.new(*([val[0]] + val[1] + [val[2]]) ) }
9
9
  ;
10
10
 
11
11
  object_list : object { result = [val[0]] }
@@ -16,16 +16,24 @@ rule
16
16
  ;
17
17
 
18
18
  at_object : comment { result = val[0] }
19
+ | string { result = val[0] }
19
20
  | entry { result = val[0] }
20
21
  ;
21
22
 
22
- comment : COMMENT opt_space value { result = Comment.new(val[2]) }
23
+ string : STRING opt_space
24
+ LBRACE opt_space
25
+ WORD opt_space
26
+ EQUALS opt_space
27
+ value opt_space
28
+ RBRACE { result = Abbreviation.new(val[4], val[8]) }
29
+
30
+ comment : COMMENT opt_space value_element { result = Comment.new(val[2]) }
23
31
  ;
24
32
 
25
33
  entry : entry_head assignment_list
26
- RBRACE { val[0].properties = val[1]; result = val[0] }
34
+ RBRACE { val[0].merge val[1]; result = val[0] }
27
35
  | entry_head assignment_list /* Allow additional comma after last entry */
28
- COMMA opt_space RBRACE { val[0].properties = val[1]; result = val[0] }
36
+ COMMA opt_space RBRACE { val[0].merge val[1]; result = val[0] }
29
37
  | entry_head RBRACE { result = val[0] } /* Empty entry */
30
38
  ;
31
39
 
@@ -39,49 +47,60 @@ rule
39
47
  ;
40
48
 
41
49
  key_name : { result = "" }
42
- | CHARACTER
43
- | WORD
44
- | key_name CHARACTER { result << val[1] }
45
- | key_name WORD { result << val[1] }
50
+ | key_name_token { result = val[0] }
51
+ | key_name key_name_token { result << val[1] }
46
52
  ;
47
53
 
54
+ key_name_token : WORD
55
+ | CHARACTER
56
+ | number_as_str
57
+ ;
58
+
59
+
48
60
  assignment_list : assignment { result = val[0] }
49
61
  | assignment_list COMMA opt_space
50
62
  assignment { result.merge!(val[3]) }
51
63
  ;
52
64
 
53
65
  assignment : assignment_lhs opt_space
54
- EQUALS opt_space
55
- value opt_space { result = {val[0].downcase.to_sym => val[4]} }
66
+ EQUALS opt_space
67
+ value opt_space { result = {val[0].downcase.to_sym => val[4]} }
56
68
  ;
57
69
 
58
70
  assignment_lhs : WORD
59
71
  ;
60
72
 
61
- value : LBRACE value_content RBRACE { result = val[1] }
62
- | SINGLE_ANFZ value_content_single_del SINGLE_ANFZ { result = val[1] }
63
- | DOUBLE_ANFZ value_content_double_del DOUBLE_ANFZ { result = val[1] }
64
- | WORD
73
+ value : value_element { result = Value.new(val[0]) }
74
+ | value opt_space
75
+ CONCATENATION opt_space
76
+ value { result = val[0] + val[4] }
77
+ ;
78
+
79
+ value_element : LBRACE value_content RBRACE { result = val[1] }
80
+ | SINGLE_QUOTE value_content_single_del SINGLE_QUOTE { result = val[1] }
81
+ | DOUBLE_QUOTE value_content_double_del DOUBLE_QUOTE { result = val[1] }
82
+ | WORD { result = AbbreviationRef.new(val[0]) }
83
+ | NUMBER { result = val[0] }
65
84
  ;
66
85
 
67
86
  brace : LBRACE
68
87
  | RBRACE
69
88
  ;
70
89
 
71
- anfz : SINGLE_ANFZ
72
- | DOUBLE_ANFZ
90
+ anfz : SINGLE_QUOTE
91
+ | DOUBLE_QUOTE
73
92
  ;
74
93
 
75
94
  value_content_single_del : { result = "" }
76
95
  | value_content general_vc_token { result << val[1] }
77
96
  | value_content brace { result << val[1] }
78
- | value_content DOUBLE_ANFZ { result << val[1] }
97
+ | value_content DOUBLE_QUOTE { result << val[1] }
79
98
  ;
80
99
 
81
100
  value_content_double_del : { result = "" }
82
101
  | value_content general_vc_token { result << val[1] }
83
102
  | value_content brace { result << val[1] }
84
- | value_content SINGLE_ANFZ { result << val[1] }
103
+ | value_content SINGLE_QUOTE { result << val[1] }
85
104
  ;
86
105
 
87
106
  value_content : { result = "" }
@@ -93,12 +112,17 @@ rule
93
112
 
94
113
  general_vc_token : SPACE
95
114
  | WORD
115
+ | STRING
96
116
  | COMMA
97
117
  | EQUALS
98
118
  | CHARACTER
119
+ | CONCATENATION
99
120
  | AT
121
+ | number_as_str
100
122
  ;
101
123
 
124
+ number_as_str : NUMBER { result = val[0].to_s }
125
+
102
126
  opt_space_element :
103
127
  | space_element
104
128
  ;
@@ -117,37 +141,7 @@ end
117
141
 
118
142
  ---- header ----
119
143
 
120
- module BibTeX
121
- Entry = Struct.new(:type, :key, :properties)
122
-
123
- class Space < String; end
124
-
125
- class Comment
126
- def initialize(comment)
127
- @comment = comment
128
- end
129
-
130
- def to_s
131
- @comment
132
- end
133
-
134
- alias :to_str :to_s
135
- end
136
-
137
- class File
138
- def initialize(*elements)
139
- @elements = elements.compact
140
- end
141
-
142
- def elements
143
- @elements
144
- end
145
-
146
- def entries
147
- @elements.select { | e | e.is_a?Entry }
148
- end
149
- end
150
- end
144
+ require "rbibtex/types"
151
145
 
152
146
  ---- inner ----
153
147
 
@@ -158,7 +152,7 @@ end
158
152
  def parse( str )
159
153
  @q = []
160
154
 
161
- # @yydebug = true
155
+ #@yydebug = true
162
156
 
163
157
  while str.size > 0 do
164
158
  case str
@@ -166,6 +160,8 @@ end
166
160
  @q.push [:SPACE, $&]
167
161
  when /\Acomment/io
168
162
  @q.push [:COMMENT, $&]
163
+ when /\Astring/io
164
+ @q.push [:STRING, $&]
169
165
  when /\A\,/o
170
166
  @q.push [:COMMA, $&]
171
167
  when /\A@/o
@@ -177,11 +173,15 @@ end
177
173
  when /\A\=/o
178
174
  @q.push [:EQUALS, $&]
179
175
  when /\A\'/o
180
- @q.push [:SINGLE_ANFZ, $&]
176
+ @q.push [:SINGLE_QUOTE, $&]
181
177
  when /\A\"/o
182
- @q.push [:DOUBLE_ANFZ, $&]
178
+ @q.push [:DOUBLE_QUOTE, $&]
179
+ when /\A\d+\b/o
180
+ @q.push [:NUMBER, $&.to_i]
183
181
  when /\A\w+/o
184
182
  @q.push [:WORD, $&]
183
+ when /\A#/o
184
+ @q.push [:CONCATENATION, $&]
185
185
  when /\A./o
186
186
  @q.push [:CHARACTER, $&]
187
187
  when /\A\d+/o
@@ -199,4 +199,8 @@ end
199
199
  end
200
200
 
201
201
 
202
+ def on_error(*args)
203
+ p args
204
+ super
205
+ end
202
206
  ---- footer ----
@@ -0,0 +1,64 @@
1
+ # Some string handling routines for wordwrapping and tabifying.
2
+ class String
3
+ # Do a wordwrap. Will not work correctly if the string contains tab characters
4
+ def wrap(width = 78)
5
+ raise "Invalid width" if width < 0
6
+ self.split("\n", -1).map { | line |
7
+ indent = line[/\A\s*/]
8
+ line = line[/\A\s*(.*)\Z/, 1]
9
+ result = [""]
10
+ line.split(/(?=\s+)/).each do | token |
11
+ if result[-1].length + token.length <= width
12
+ result[-1] << token
13
+ else
14
+ result << token[/\A\s*(.*)\Z/, 1]
15
+ end
16
+ end
17
+ result.shift if result.first == ""
18
+ result.map{ |subline| indent + subline }
19
+ }.join("\n")
20
+ end
21
+
22
+ # Indent everything but the first line
23
+ def indent_hanging(indent = 2)
24
+ first, rest = *self.split("\n", 2)
25
+ return "" unless first
26
+ if rest
27
+ first + "\n" + (rest.indent(indent))
28
+ else
29
+ first
30
+ end
31
+ end
32
+
33
+ # Indent by indent spaces
34
+ def indent(indent = 2)
35
+ self.gsub(/^/, " " * indent)
36
+ end
37
+
38
+ # Substitute tabs with spaces
39
+ def untabify(tab_width = 8)
40
+ self.gsub(/\t/, " "*tab_width)
41
+ end
42
+
43
+ # Deindent ignoring the first line. Will not work correctly if the string contains tab characters
44
+ def deindent_hanging(indent = self.scan(/(?:\n)(\s*)(?=\S)/).flatten.map{|e|e.length}.min)
45
+ deindent(indent)
46
+ end
47
+
48
+ def remove_trailing_whitespace
49
+ self.gsub(/\s+$/, "")
50
+ end
51
+
52
+ # Remove indentation. The optional argument gives the number of spaces to indent by. Will not work correctly if the string contains tab characters.
53
+ def deindent(indent = self.scan(/^\s*/).map{|e|e.length}.min)
54
+ self.gsub(/^ {0,#{indent}}/, "")
55
+ end
56
+
57
+ # Change paragraphs (delimited by two blank lines) into one liners. Indented text is left as is.
58
+ def unwrap
59
+ self.sub(/\A\n+/i, '').split(/\n(?:\s*\n)+/).map { | paragraph |
60
+ paragraph.gsub(/((?:\n|^)\S[^\n]*)\n(?=\S)/, "\\1 ")
61
+ }.join("\n\n")
62
+ end
63
+ end
64
+
@@ -0,0 +1,75 @@
1
+ module BibTeX
2
+
3
+ # Contains modules that can be included in OptionParser parsers and create
4
+ # switches there. Be sure to call super in the initialize methods.
5
+ module Options
6
+
7
+ # Module to be included into an OptionParser parser. Creates output, help and
8
+ # version switches.
9
+ #
10
+ # This depends on a VERSION Constant being declared in the parser.
11
+ module ApplicationOptions
12
+ attr_reader :output
13
+
14
+ def initialize
15
+ super()
16
+
17
+ @output = STDOUT
18
+ self.on("-o", "--output FILENAME", String,
19
+ "Set the output filename. Writes to stdout if nothing is given") { | v | @output = File.open(v, 'w') }
20
+ self.separator ""
21
+ self.on("-?", "--help",
22
+ "Show this help") { puts self; exit }
23
+ self.on("-v", "--version",
24
+ "Output version number and exit") { puts VERSION; exit }
25
+ end
26
+ end
27
+
28
+ # Module to be included into an OptionParser parser. Creates sort by switch.
29
+ module SortOptions
30
+ attr_reader :sort_by
31
+
32
+ def initialize
33
+ super()
34
+
35
+ self.separator ""
36
+ self.separator "Sorting of entries"
37
+ self.on("-s", "--sort-by", "--sort_by Field_1,Field2,...", Array,
38
+ "Specify fields that shall be used for sorting of the file") { | v | @sort_by = v.map { | e | e.downcase.to_sym } }
39
+ end
40
+
41
+ def parse!(*args)
42
+ super
43
+ raise "You can't specify an empty list of fields for sorting" if @sort_by and @sort_by.empty?
44
+ end
45
+ end
46
+
47
+ # Module to be included in an OptionParser parse. Creates top_fields and
48
+ # bottom_fields switches.
49
+ module SortFieldsOptions
50
+ attr_reader :top_fields, :bottom_fields
51
+
52
+ def initialize
53
+ super()
54
+
55
+ @top_fields = [:author, :title, :year]
56
+ @bottom_fields = [:note, :pdf, :ps, :www]
57
+
58
+ self.separator ""
59
+ self.separator "Sorting of fields"
60
+ self.on("-t", "--top-fields", "--top_fields Field_1,Field2,...", Array,
61
+ "Specify a list of fields that are written first in the output") { | v |
62
+ @top_fields = v.map { | e | e.downcase.to_sym }
63
+ @bottom_fields -= @top_fields
64
+ }
65
+ self.on("-b", "--bottom-fields", "--bottom_fields Field_1,Field2,...", Array,
66
+ "Specify a list of fields that are written last in the output") { | v |
67
+ @bottom_fields = v.map { | e | e.downcase.to_sym }
68
+ @top_fields -= @bottom_fields
69
+ }
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+ end
@@ -0,0 +1,344 @@
1
+ # TODO: Do not modify base class. (I want selector namespaces :-(
2
+ require "rbibtex/string"
3
+
4
+ module BibTeX
5
+ class Entry
6
+ attr_accessor :type, :key
7
+
8
+ def self.norm_key(key)
9
+ key.to_s.downcase.to_sym
10
+ end
11
+
12
+ private
13
+ def norm_key(*args)
14
+ self.class.norm_key(*args)
15
+ end
16
+
17
+ public
18
+
19
+ CAPITALIZATION = Hash.new { | h, k |
20
+ if k == norm_key(k)
21
+ k.to_s.capitalize
22
+ else
23
+ h[norm_key(k)]
24
+ end
25
+ }
26
+ CAPITALIZATION.merge!({
27
+ :additional => "additional",
28
+ :booktitle => "BookTitle",
29
+ :isbn => "isbn",
30
+ :issn => "issn",
31
+ :pdf => "pdf",
32
+ :ps => "ps"
33
+ })
34
+
35
+ public
36
+ def initialize(type, key, properties = {})
37
+ @properties = properties.inject({}) { | r, (k, v) | r[norm_key(k)] = v; r }
38
+ @key = key
39
+ @type = type
40
+ end
41
+
42
+ def method_missing(method, *args)
43
+ case args.length
44
+ when 0
45
+ if has_key?(method)
46
+ return self[method]
47
+ else
48
+ super
49
+ end
50
+ when 1
51
+ if /\A(.*)=\Z/ =~ method.to_s
52
+ self[$1] = args[0]
53
+ else
54
+ super
55
+ end
56
+ else
57
+ super
58
+ end
59
+ end
60
+
61
+ def respond_to?(method, *args)
62
+ case args.length
63
+ when 0
64
+ if has_key?(method)
65
+ true
66
+ else
67
+ super
68
+ end
69
+ when 1
70
+ if /\A(.*)=\Z/ =~ method.to_s
71
+ true
72
+ else
73
+ super
74
+ end
75
+ else
76
+ super
77
+ end
78
+ end
79
+
80
+ def normalize
81
+ res = self.class.new(self.type, self.key, @properties.inject({}) { | r, (k, v) |
82
+ r[k] = v.map { | ve |
83
+ if ve.is_a?AbbreviationRef
84
+ ve
85
+ elsif ve.is_a?String
86
+ if /\A\d+\Z/ =~ ve
87
+ Integer(ve)
88
+ elsif ve.respond_to?:deindent_hanging
89
+ ve.deindent_hanging.unwrap
90
+ end
91
+ else
92
+ ve
93
+ end
94
+ }
95
+ r
96
+ })
97
+ end
98
+
99
+ def has_key?(key)
100
+ @properties.has_key?(norm_key(key))
101
+ end
102
+
103
+ def keys
104
+ @properties.keys + [:type, :key]
105
+ end
106
+
107
+ def merge(hash)
108
+ hash.each do | (k, v) |
109
+ self[k] = v
110
+ end
111
+ end
112
+
113
+ def each &block
114
+ @properties.each &block
115
+ end
116
+ include Enumerable
117
+
118
+ def [](key)
119
+ case norm_key(key)
120
+ when :key
121
+ @key
122
+ when :type
123
+ @type
124
+ else
125
+ @properties[norm_key(key)]
126
+ end
127
+ end
128
+
129
+ def []=(key, value)
130
+ case norm_key(key)
131
+ when :key
132
+ @key = value
133
+ when :type
134
+ @type = value
135
+ else
136
+ @properties[norm_key(key)] = value
137
+ end
138
+ end
139
+ end
140
+
141
+ class Space < String; end
142
+
143
+ class Comment < String
144
+ def normalize
145
+ Comment.new(self.deindent_hanging.unwrap)
146
+ end
147
+ end
148
+
149
+ class Value
150
+ attr_reader :elements
151
+
152
+ def initialize(*elements)
153
+ @elements = elements
154
+ end
155
+
156
+ def to_s
157
+ @elements.map { | e | e.to_s }.join("")
158
+ end
159
+
160
+ def +(o)
161
+ self.class.new(*(@elements + o.elements))
162
+ end
163
+
164
+ def each(&block)
165
+ @elements.each(&block)
166
+ self
167
+ end
168
+ include Enumerable
169
+
170
+ # map each element of the value onto a new value
171
+ def map!(&block)
172
+ @elements.map!(&block)
173
+ end
174
+
175
+ # Create a new Value object where each element is replaced by its map'ed element
176
+ def map(&block)
177
+ Value.new(*@elements.map(&block))
178
+ end
179
+
180
+ def to_bib
181
+ @elements.map { | ve |
182
+ if ve.is_a?Numeric
183
+ ve.to_s
184
+ elsif ve.respond_to?:to_bib
185
+ ve.to_bib
186
+ elsif ve.respond_to?:to_str
187
+ %({#{ve.to_str}})
188
+ else
189
+ raise "Don't know how to display #{ve.inspect} as a bibtex element"
190
+ end
191
+ }.join(" # ")
192
+ end
193
+
194
+ # Create a flat version, where each element that is itself a value is included directly
195
+ def flatten
196
+ self.class.new(
197
+ *@elements.map { | e | e.respond_to?(:flatten) ? e.flatten : e}.
198
+ map { | e | e.respond_to?(:elements) ? e.elements : e}.
199
+ flatten
200
+ )
201
+ end
202
+
203
+ # Create a normalized version where:
204
+ # - strings following each other are joined together
205
+ # - Elements of the Value are normalized
206
+ # -
207
+ def normalize
208
+ self.class.new(
209
+ *self.flatten.elements.
210
+ inject([]) { | r, e |
211
+ if r[-1].is_a?String and e.is_a?String
212
+ r[-1] += e
213
+ else
214
+ r << e
215
+ end
216
+ r
217
+ }
218
+ )
219
+ end
220
+ end
221
+
222
+ class AbbreviationRef < String; end
223
+ class Abbreviation
224
+ attr_accessor :key, :string
225
+
226
+ def initialize(key, string)
227
+ @key, @string = key, string
228
+ end
229
+
230
+ def to_s
231
+ @string
232
+ end
233
+
234
+ def to_bib
235
+ @key.dup
236
+ end
237
+
238
+ def normalize
239
+ self.class.new(@key, @string.map { | e | e.is_a?(String) ? e.deindent_hanging.unwrap : e })
240
+ end
241
+
242
+ alias :to_str :to_s
243
+ end
244
+
245
+ class BibTeXFile
246
+
247
+ private
248
+
249
+ def norm_key(key)
250
+ key.to_s.downcase.to_sym
251
+ end
252
+
253
+ public
254
+
255
+ attr_reader :elements
256
+
257
+ def each &block
258
+ @elements.each &block
259
+ end
260
+ include Enumerable
261
+
262
+ def initialize(*elements)
263
+ @elements = elements.compact
264
+ end
265
+
266
+ # Create a File from a string
267
+ def self.parse(string)
268
+ Parser.new.parse(string)
269
+ end
270
+
271
+ # Return all entries in this file.
272
+ def entries
273
+ @elements.select { | e | e.is_a?Entry }
274
+ end
275
+
276
+ # When keys are given sorts the bibtex file according to the keys,
277
+ # discarding any empty spaces and sorting comments and strings to the top.
278
+ #
279
+ # When a block is given the sortorder need to be inferred in the block
280
+ def sort_by(*keys)
281
+ raise "Give either a block or keys to sort on" if block_given? and !keys.empty?
282
+ self.normalize!
283
+ @elements = @elements.sort_by { | element |
284
+ case element
285
+ when Abbreviation
286
+ [0, element.key]
287
+ when Comment
288
+ [1, element.to_s]
289
+ when Entry
290
+ [2, keys.map { | k | (element[k] || "").to_s }]
291
+ else
292
+ [10]
293
+ end
294
+ }
295
+ end
296
+
297
+ # Return an array with the abbrevations in this file
298
+ def abbreviations
299
+ @elements.select { | element | element.is_a?Abbreviation }
300
+ end
301
+
302
+ # Get an abbrevation by key
303
+ def abbreviation(key)
304
+ abbreviations.select { | a | norm_key(a.key) == norm_key(key) }.first
305
+ end
306
+
307
+ # Check if abbreviation is defined
308
+ def has_abbreviation?(key)
309
+ abbreviation(key) != nil
310
+ end
311
+
312
+ # - Remove additional whitespace from the filelist
313
+ # - Deindent and unwrap text
314
+ def normalize!
315
+ @elements = @elements.reject { | e | e.is_a?Space }
316
+ @elements.map! { | e |
317
+ if e.respond_to?:normalize
318
+ next e.normalize
319
+ end
320
+ }
321
+ end
322
+
323
+ # Replace abbreviations by their string equivalent. (Does not replace
324
+ # predefined abbreviations like jan, feb if they are not explicitely
325
+ # specified in this bibtex file
326
+ def interpolate!
327
+ self.entries.each do | entry |
328
+ entry.each do | (k, v) |
329
+ interpolated = v.map{ | e |
330
+ if e.is_a?AbbreviationRef and has_abbreviation?(e)
331
+ abbreviation(e).string
332
+ else
333
+ e
334
+ end
335
+ }
336
+ shrunk = interpolated.normalize
337
+ entry[k] = Value.new(*shrunk)
338
+ end
339
+ end
340
+ end
341
+ end
342
+ end
343
+
344
+
data/lib/rbibtex.rb CHANGED
@@ -1 +1,8 @@
1
1
  require "rbibtex/rbibtex.tab"
2
+
3
+ module BibTeX
4
+ # Convenience function parsing a string into a BibTeX::File object.
5
+ def self.parse(string)
6
+ Parser.new.parse(string)
7
+ end
8
+ end