RubyGems - aurels-rbib - Versions diffs - 1.0.2 - Mend

aurels-rbib 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/README ADDED

@@ -0,0 +1,4 @@
+Parser for BibTeX files written in Ruby. No dependencies (except Test::Unit if
+you need to run the tests).
+By Nick Gasson (http://www.doof.me.uk)

data/bibtex/bibliography.rb ADDED

@@ -0,0 +1,45 @@
+require 'bibtex/entry'
+module BibTeX
+  class Bibliography
+    attr_reader :entries
+    def initialize
+      @entries = {}
+    end
+    def <<(e)
+      if e.kind_of? Entry then
+        @entries[e.key] = e
+      else
+        raise 'Cannot add non-entries to bibliography'
+      end
+    end
+    def [](key)
+      @entries[key] or raise "No entry #{key}"
+    end
+    # Transform the entries in some way and return a
+    # new bibliography
+    def map
+      r = Bibliography.new
+      @entries.each do |k, e|
+        r << yield(e)
+      end
+      return r
+    end
+    def save(filename)
+      f = File.new(filename, 'w')
+      f.puts self.to_s
+      f.close
+    end
+    def to_s
+      @entries.keys.sort.collect { |k| @entries[k].to_s }.join
+    end
+  end
+end

data/bibtex/entry.rb ADDED

@@ -0,0 +1,70 @@
+module BibTeX
+  # A single entry in a bibliography
+  class Entry
+    attr_reader :type, :key
+    def initialize(type, key)
+      @type = type
+      @key = key
+      @fields = {}
+    end
+    def add_field(obj, value = nil)
+      if obj.kind_of? Field then
+        @fields[obj.key] = obj
+      else
+        @fields[obj] = Field.new(obj, value)
+      end
+    end
+    def [](key)
+      f = @fields[key]
+      if f then
+        f.value
+      else
+        raise "No field with key #{key}"
+      end
+    end
+    def to_s
+      fs = @fields.collect { |k, f| "  #{f.to_s}" }.sort.join ",\n"
+      "@#{@type}{#{@key},\n#{fs}\n}\n\n"
+    end
+    def reject_fields(keys)
+      r = Entry.new(@type, @key)
+      @fields.each do |k, f|
+        r.add_field f unless keys.index k
+      end
+      return r
+    end
+    def select_fields(keys)
+      r = Entry.new(@type, @key)
+      @fields.each do |k, f|
+        r.add_field f if keys.index k
+      end
+      return r
+    end
+  end
+  # Different types of entries
+  module EntryType
+    Book = 'book'
+    Article = 'article'
+    Booklet = 'booklet'
+    Conference = 'conference'
+    InBook = 'inbook'
+    InCollection = 'incollection'
+    InProceedings = 'inproceedings'
+    Manual = 'manual'
+    MastersThesis = 'mastersthesis'
+    Misc = 'misc'
+    PhDThesis = 'phdthesis'
+    Proceedings = 'proceedings'
+    TechReport = 'techreport'
+    Unpublished = 'unpublished'
+  end
+end

data/bibtex/field.rb ADDED

@@ -0,0 +1,17 @@
+module BibTeX
+  # A field within an entry E.g. author = {Foo}
+  class Field
+    attr_reader :key, :value
+    def initialize(key, value)
+      @key = key
+      @value = value
+    end
+    def to_s
+      "#{@key} = {#{@value}}"
+    end
+  end
+end

data/bibtex/lexer.rb ADDED

@@ -0,0 +1,123 @@
+require 'strscan'
+module BibTeX
+  class SourcePos
+    attr_reader :line, :column, :file
+    def initialize(line, column, file)
+      @line = line
+      @column = column
+      @file = file
+    end
+    def to_s
+      "#{file}:#{line}"
+    end
+  end
+  class RuleSet
+    def initialize
+      @rules = []
+    end
+    def match(regexp, result)
+      @rules << [regexp, result]
+    end
+    def literals(words)
+      words.each do |w|
+        match /#{w}/, w
+      end
+    end
+    def each
+      @rules.each do |pair|
+        yield pair[0], pair[1]
+      end
+    end
+  end
+  class LexerError < RuntimeError
+    attr_reader :src_pos
+    def initialize(mess, src_pos)
+      super(mess)
+      @src_pos = src_pos
+    end
+  end
+  class Lexer
+    attr_reader :lval, :ignore_whitespace
+    attr_accessor :ignore_newlines, :file_name
+    def initialize(ignore_whitespace = false)
+      @scanner = StringScanner.new('')
+      @rules = RuleSet.new
+      @ignore_whitespace = ignore_whitespace
+      @ignore_newlines = ignore_whitespace
+      @lineno = 1
+      @file_name = '<unknown>'
+      yield @rules
+    end
+    # ignore_whitespace turns on ignore_newlines too
+    def ignore_whitespace=(b)
+      @ignore_whitespace = b
+      @ignore_newlines = b
+    end
+    def feed(str)
+      @scanner = StringScanner.new(str)
+      @cols_prev = 0
+    end
+    def src_pos
+      SourcePos.new(@lineno, @scanner.pos - @cols_prev, @file_name)
+    end
+    def next_token!
+      if @scanner.check /^\s*\n/ then
+        @lineno += 1
+        @cols_prev = @scanner.pos + 1
+      end
+      skip_whitespace
+      @rules.each do |regexp, result|
+        return result if @lval = @scanner.scan(regexp)
+      end
+      unexpect = if @scanner.rest.length < 10 then
+                   @scanner.rest
+                 else
+                   "#{@scanner.rest.first 10}..."
+                 end
+      raise LexerError.new("Unexpected input #{unexpect}", src_pos)
+    end
+    def peek_token
+      tok = self.next_token!
+      @scanner.unscan
+      return tok
+    end
+    def peek_lval
+      peek_token
+      @lval
+    end
+    def more_tokens?
+      skip_whitespace
+      not @scanner.eos?
+    end
+    private
+    def skip_whitespace
+      if @ignore_newlines and @ignore_whitespace then
+        @scanner.skip /\s+/
+      elsif @ignore_whitespace then
+        @scanner.skip /[ \t\r]+/
+      elsif @ignore_newlines  then
+        @scanner.skip /[\r\n]+/
+      end
+    end
+  end
+end

data/bibtex/parser.rb ADDED

@@ -0,0 +1,116 @@
+require 'bibtex/bibliography'
+require 'bibtex/entry'
+require 'bibtex/field'
+require 'bibtex/lexer'
+module BibTeX
+  class Parser
+    def self.parse(filename)
+      parse_string File.read(filename)
+    end
+    def self.parse_string(data)
+      @lexer.feed data
+      b = Bibliography.new
+      while @lexer.more_tokens?
+        b << parse_entry
+      end
+      return b
+    end
+    private
+    def self.parse_entry
+      expect :at, '@'
+      type = expect :id
+      expect :lbrace, '{'
+      key = expect :id
+      e = Entry.new(type, key)
+      while @lexer.peek_token != :rbrace
+        expect :comma, ','
+        e.add_field parse_field
+      end
+      expect :rbrace, '}'
+      return e
+    end
+    def self.parse_field
+      key = expect :id
+      expect :equals, '='
+      value = parse_value
+      Field.new(key.intern, value)
+    end
+    def self.parse_value
+      close = :rbrace
+      brace_count = 1
+      if @lexer.peek_token == :dquote then
+        expect :dquote
+        close = :dquote
+      elsif @lexer.peek_token == :lbrace then
+        expect :lbrace, '{'
+      else
+        # Not surrounded by quotes or braces
+        brace_count = 0
+      end
+      str = ''
+      @lexer.ignore_whitespace = false
+      @lexer.ignore_newlines = true
+      loop do
+        unless @lexer.more_tokens?
+          raise 'Unexpected end of input'
+        end
+        if (@lexer.peek_token == :comma \
+            or @lexer.peek_token == :rbrace) and brace_count == 0 then
+          # A field not delimited by "" or {}
+          @lexer.ignore_whitespace = true
+          return str
+        end
+        case @lexer.next_token!
+        when :rbrace, close
+          brace_count -= 1
+          if brace_count == 0 then
+            @lexer.ignore_whitespace = true
+            return str
+          else
+            str += '}'
+          end
+        when :lbrace
+          str += '{'
+          brace_count += 1
+        else
+          str += @lexer.lval
+        end
+      end
+    end
+    def self.expect(token, pretty = nil)
+      pretty ||= token.to_s
+      got = @lexer.next_token!
+      unless got == token then
+        raise "#{@lexer.src_pos}: Expected '#{pretty}' but found '#{got}' (text='#{@lexer.lval}')"
+      else
+        @lexer.lval
+      end
+    end
+    @lexer = Lexer.new(true) do |rules|
+      rules.match /@/, :at
+      rules.match /\{/, :lbrace
+      rules.match /\}/, :rbrace
+      rules.match /\"/, :dquote
+      rules.match /\=/, :equals
+      rules.match /\,/, :comma
+      rules.match /[\w\-_:&]+/, :id
+      rules.match /.+?/, :cdata
+    end
+  end
+end

data/bibtex/test_bibliography.rb ADDED

@@ -0,0 +1,76 @@
+require 'bibtex/bibliography'
+require 'test/unit'
+class TestBibliography < Test::Unit::TestCase
+  include BibTeX
+  def setup
+    @b = Bibliography.new
+    @foo01 = Entry.new(EntryType::Book, 'foo01')
+    @foo01.add_field :author, 'C. Doof'
+    @foo01.add_field :year, 2007
+    @foo01.add_field Field.new(:url, 'www.doof.me.uk')
+    @bar99 = Entry.new(EntryType::Article, 'bar99')
+    @bar99.add_field :author, 'N. Cakesniffer'
+    @bar99.add_field :year, 1999
+    @bar99.add_field Field.new(:url, 'www.cakesniffer.co.uk')
+    @b << @foo01
+    @b << @bar99
+  end
+  def test_basic
+    assert_equal 2, @b.entries.length
+    assert_equal @foo01, @b['foo01']
+  end
+  def test_map
+    expect = <<END
+@article{bar99,
+  author = {N. Cakesniffer},
+  year = {1999}
+}
+@book{foo01,
+  author = {C. Doof},
+  year = {2007}
+}
+END
+    urlless = @b.map do |e|
+      e.reject_fields [:url]
+    end
+    assert_equal expect, urlless.to_s
+  end
+  def test_to_s
+    expect = <<END
+@article{bar99,
+  author = {N. Cakesniffer},
+  url = {www.cakesniffer.co.uk},
+  year = {1999}
+}
+@book{foo01,
+  author = {C. Doof},
+  url = {www.doof.me.uk},
+  year = {2007}
+}
+END
+    assert_equal expect, @b.to_s
+  end
+  def test_save
+    fname = '/tmp/_test.bib'
+    @b.save fname
+    f = File.new(fname)
+    assert_equal @b.to_s, f.read
+    f.close
+    File.delete fname
+  end
+end