Chrononaut-treetop-dcf 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown ADDED
@@ -0,0 +1,34 @@
1
+ # treetop-dcf
2
+
3
+ Easy to use parser for
4
+ [Debian Control
5
+ Files](http://www.debian.org/doc/debian-policy/ch-controlfields.html).
6
+ The format is very similar to YAML, but with some subtle differences (e.g.
7
+ YAML requires a space after field names, DCF does not).
8
+
9
+ Parsed paragraphs are returned as hashes in an array. Example:
10
+
11
+ irb(main):001:0> require "dcf"
12
+ => true
13
+ irb(main):002:0> Dcf.parse "Attr: Value"
14
+ => [{"Attr"=>"Value"}]
15
+
16
+ The parser is currently quite slow, so if the files you're parsing allows it
17
+ I'd recommend using this YAML based parser instead: http://gist.github.com/117293
18
+ Its much faster but fails on certain valid files.
19
+
20
+ ## Installation
21
+
22
+ sudo gem install Chrononaut-treetop-dcf
23
+
24
+ ## Acknowledgements
25
+
26
+ This project was created during Google Summer of Code 2009, as part of my project
27
+ for the [R Foundation for Statistical
28
+ Computing](http://www.r-project.org/foundation/main.html). (CRAN uses DCF for
29
+ package information, and we needed a parser written in Ruby.) Therefore, a big
30
+ thanks goes out to both the R Foundation and Google.
31
+
32
+ ## Author
33
+
34
+ Bjørn Arild Mæland <bjorn.maeland at gmail.com>
data/lib/dcf.rb ADDED
@@ -0,0 +1,19 @@
1
+ require "rubygems"
2
+ require "treetop"
3
+ require File.join(File.dirname(__FILE__), "dcf_grammar")
4
+
5
+ module Dcf
6
+ # Returns an array of { attr => val } hashes
7
+ def self.parse(input)
8
+ parse = DcfParser.new.parse(input)
9
+ return if parse.nil?
10
+
11
+ parse.elements.collect do |i|
12
+ paragraph = {}
13
+ i.paragraph.elements.each do |row|
14
+ paragraph[row.field.attribute.text_value] = row.field.value.text_value
15
+ end
16
+ paragraph
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,43 @@
1
+ grammar Dcf
2
+ rule paragraphs
3
+ (paragraph eol?)*
4
+ end
5
+
6
+ rule paragraph
7
+ (field eol?)+
8
+ end
9
+
10
+ rule field
11
+ attribute separator value white*
12
+ end
13
+
14
+ rule attribute
15
+ (!eol !":" !" " .)+
16
+ end
17
+
18
+ rule value
19
+ eol? valid_value_symbols (eol white+ valid_value_symbols)*
20
+ {
21
+ def text_value
22
+ # I don't really like this, but it works for the files I'm parsing, so..
23
+ super.strip.gsub(/[\r\n] */, ' ')
24
+ end
25
+ }
26
+ end
27
+
28
+ rule valid_value_symbols
29
+ (!eol .)+
30
+ end
31
+
32
+ rule separator
33
+ ':' white*
34
+ end
35
+
36
+ rule eol
37
+ "\r\n" / [\n\r]
38
+ end
39
+
40
+ rule white
41
+ [ \t]
42
+ end
43
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+
3
+ require File.join(File.dirname(__FILE__), *%w[.. lib dcf])
4
+
5
+ require 'test/unit'
6
+ require 'shoulda'
data/test/suite.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+
3
+ # for some reason these tests fail when run via TextMate
4
+ # but succeed when run on the command line.
5
+
6
+ tests = Dir["#{File.dirname(__FILE__)}/test_*.rb"]
7
+ tests.each do |file|
8
+ require file
9
+ end
data/test/test_dcf.rb ADDED
@@ -0,0 +1,127 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.dirname(__FILE__) + '/helper'
3
+
4
+ class TestDcfParser < Test::Unit::TestCase
5
+ should "not accept spaces in keys" do
6
+ assert_nil Dcf.parse("As df: Value\n")
7
+ end
8
+
9
+ should "parse a description file" do
10
+ description = <<EOF
11
+ Package: RSQLite
12
+ Version: 0.7-1
13
+ Title: SQLite interface for R
14
+ Author: David A. James
15
+ Maintainer: Seth Falcon <seth@userprimary.net>
16
+ Description: Database Interface R driver for SQLite. This package
17
+ embeds the SQLite database engine in R and provides an
18
+ interface compliant with the DBI package. The source for the
19
+ SQLite engine (version 3.6.4) is included.
20
+ LazyLoad: yes
21
+ Depends: R (>= 2.6.0), methods, DBI (>= 0.2-3)
22
+ Imports: methods, DBI (>= 0.2-3)
23
+ License: LGPL (>= 2)
24
+ Collate: zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R
25
+ Packaged: Sat Oct 25 18:54:05 2008; seth
26
+ Repository: CRAN
27
+ Date/Publication: 2008-10-26 18:29:06
28
+ EOF
29
+ sqlite = Dcf.parse(description).first
30
+ assert_equal 14, sqlite.keys.length
31
+ assert_equal "RSQLite", sqlite["Package"]
32
+ assert_equal "2008-10-26 18:29:06", sqlite["Date/Publication"]
33
+ assert_equal "zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R", sqlite["Collate"]
34
+ end
35
+
36
+ should "parse badly formatted description files" do
37
+ description = <<EOF
38
+ Package: StatFingerprints
39
+ Version: 1.3
40
+ Depends:
41
+ rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot
42
+ EOF
43
+ stat = Dcf.parse(description).first
44
+ assert_equal "rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot", stat["Depends"]
45
+ end
46
+
47
+ should "parse a simple entry" do
48
+ parse = Dcf.parse("Attr: Value\n").first
49
+ assert_equal ["Attr"], parse.keys
50
+ assert_equal "Value", parse["Attr"]
51
+ end
52
+
53
+ should "ignore trailing whitespace" do
54
+ parse = Dcf.parse("Attr: Value \n").first
55
+ assert_equal "Value", parse["Attr"]
56
+ end
57
+
58
+ should "parse values that contains colons" do
59
+ sample = <<EOF
60
+ Package: BCE
61
+ Version: 1.3
62
+ Title: Bayesian composition estimator: estimating sample (taxonomic)
63
+ composition from biomarker data
64
+ Author: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>, Karline
65
+ Soetaert <k.soetaert@nioo.knaw.nl>
66
+ Maintainer: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>
67
+ Depends: R (>= 2.01), limSolve
68
+ Description: Function to estimates taxonomic compositions from
69
+ biomarker data, using a Bayesian approach.
70
+ License: GPL
71
+ LazyData: yes
72
+ Repository: CRAN
73
+ Repository/R-Forge/Project: bce
74
+ Repository/R-Forge/Revision: 43
75
+ Date/Publication: 2009-06-03 20:45:44
76
+ Packaged: 2009-06-03 02:59:04 UTC; rforge
77
+ EOF
78
+ parse = Dcf.parse(sample).first
79
+ assert_equal "Bayesian composition estimator: estimating sample (taxonomic) " +
80
+ "composition from biomarker data", parse["Title"]
81
+ end
82
+
83
+ should "parse multiple paragraphs" do
84
+ sample = <<EOF
85
+ Package: AIGIS
86
+ Version: 1.0
87
+ Depends: R (>= 2.5.1), gpclib (>= 1.4)
88
+
89
+ Package: AIS
90
+ Version: 1.0
91
+ Depends: R (>= 2.1.0)
92
+ Suggests: R2HTML
93
+ EOF
94
+ parse = Dcf.parse(sample)
95
+ assert_equal "AIGIS", parse[0]["Package"]
96
+ assert_equal "R (>= 2.5.1), gpclib (>= 1.4)", parse[0]["Depends"]
97
+ assert_equal "AIS", parse[1]["Package"]
98
+ end
99
+
100
+ should "parse multiline field values" do
101
+ sample = <<EOF
102
+ Package: ADaCGH
103
+ Version: 1.3-10
104
+ Depends: R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD,
105
+ waveslim, cluster, snapCGH, Hmisc
106
+ Suggests: Rmpi, GLAD, DNAcopy
107
+ EOF
108
+ parse = Dcf.parse(sample).first
109
+ assert_equal "R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD, " +
110
+ "waveslim, cluster, snapCGH, Hmisc",
111
+ parse["Depends"]
112
+ end
113
+
114
+ should "parse file with UTF-8 data" do
115
+ sample = <<EOF
116
+ Encoding: latin1
117
+ Packaged: Tue May 26 10:40:31 2009; Grömping
118
+ Repository: CRAN
119
+ Date/Publication: 2009-05-26 09:23:28
120
+ EOF
121
+ sample = (RUBY_VERSION =~ /1.9/) ? sample.force_encoding("binary") : sample
122
+ match = (RUBY_VERSION =~ /1.9/) ? "Grömping".force_encoding("binary") : "Grömping"
123
+ parse = Dcf.parse(sample).first
124
+ assert_not_nil parse
125
+ assert_equal "Tue May 26 10:40:31 2009; #{match}", parse["Packaged"]
126
+ end
127
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: Chrononaut-treetop-dcf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Bjorn Arild Maeland
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-06-06 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: treetop
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.5
24
+ version:
25
+ description: Easy to use parser for Debian Control Files.
26
+ email: bjorn.maeland@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.markdown
33
+ files:
34
+ - lib/dcf.rb
35
+ - lib/dcf_grammar.treetop
36
+ - README.markdown
37
+ has_rdoc: false
38
+ homepage: http://github.com/Chrononaut/treetop-dcf
39
+ post_install_message:
40
+ rdoc_options:
41
+ - --charset=UTF-8
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project:
59
+ rubygems_version: 1.2.0
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: Easy to use parser for Debian Control Files.
63
+ test_files:
64
+ - test/helper.rb
65
+ - test/suite.rb
66
+ - test/test_dcf.rb