treetop-dcf 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +34 -0
- data/lib/dcf.rb +24 -0
- data/lib/dcf_grammar.treetop +43 -0
- data/test/helper.rb +6 -0
- data/test/suite.rb +9 -0
- data/test/test_dcf.rb +128 -0
- metadata +68 -0
data/README.markdown
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# treetop-dcf
|
2
|
+
|
3
|
+
Easy to use parser for
|
4
|
+
[Debian Control
|
5
|
+
Files](http://www.debian.org/doc/debian-policy/ch-controlfields.html).
|
6
|
+
The format is very similar to YAML, but with some subtle differences (e.g.
|
7
|
+
YAML requires a space after field names, DCF does not).
|
8
|
+
|
9
|
+
Parsed paragraphs are returned as hashes in an array. Example:
|
10
|
+
|
11
|
+
irb(main):001:0> require "dcf"
|
12
|
+
=> true
|
13
|
+
irb(main):002:0> Dcf.parse "Attr: Value"
|
14
|
+
=> [{"Attr"=>"Value"}]
|
15
|
+
|
16
|
+
The parser is currently quite slow, so if the files you're parsing allows it
|
17
|
+
I'd recommend using this YAML based parser instead: http://gist.github.com/117293
|
18
|
+
Its much faster but fails on certain valid files.
|
19
|
+
|
20
|
+
## Installation
|
21
|
+
|
22
|
+
sudo gem install treetop-dcf
|
23
|
+
|
24
|
+
## Acknowledgements
|
25
|
+
|
26
|
+
This project was created during Google Summer of Code 2009, as part of my project
|
27
|
+
for the [R Foundation for Statistical
|
28
|
+
Computing](http://www.r-project.org/foundation/main.html). (CRAN uses DCF for
|
29
|
+
package information, and we needed a parser written in Ruby.) Therefore, a big
|
30
|
+
thanks goes out to both the R Foundation and Google.
|
31
|
+
|
32
|
+
## Author
|
33
|
+
|
34
|
+
Bjørn Arild Mæland <bjorn.maeland at gmail.com>
|
data/lib/dcf.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "treetop"
|
3
|
+
require File.join(File.dirname(__FILE__) + "/dcf_grammar")
|
4
|
+
|
5
|
+
module Dcf
|
6
|
+
# @param [String] input
|
7
|
+
# @return [Array, nil] An array of { attr => val } hashes or nil if failure
|
8
|
+
def self.parse(input)
|
9
|
+
parse = DcfParser.new.parse(input)
|
10
|
+
return if parse.nil?
|
11
|
+
|
12
|
+
parse.elements.collect do |i|
|
13
|
+
paragraph = {}
|
14
|
+
i.paragraph.elements.each do |row|
|
15
|
+
paragraph[row.field.attribute.text_value] = row.field.value.text_value
|
16
|
+
end
|
17
|
+
paragraph
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.version
|
22
|
+
"0.2.0"
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
grammar Dcf
|
2
|
+
rule paragraphs
|
3
|
+
(paragraph eol?)+
|
4
|
+
end
|
5
|
+
|
6
|
+
rule paragraph
|
7
|
+
(field eol?)+
|
8
|
+
end
|
9
|
+
|
10
|
+
rule field
|
11
|
+
attribute separator value
|
12
|
+
end
|
13
|
+
|
14
|
+
rule attribute
|
15
|
+
(!":" !eol .)+
|
16
|
+
end
|
17
|
+
|
18
|
+
rule value
|
19
|
+
eol? (!eol .)+ (eol white+ (!eol .)+)*
|
20
|
+
{
|
21
|
+
def text_value
|
22
|
+
# I don't really like this, but it works for the files I'm parsing, so..
|
23
|
+
super.strip.gsub(/\n */, ' ')
|
24
|
+
end
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
rule next_record
|
29
|
+
"\n" [a-zA-Z]
|
30
|
+
end
|
31
|
+
|
32
|
+
rule separator
|
33
|
+
':' white*
|
34
|
+
end
|
35
|
+
|
36
|
+
rule eol
|
37
|
+
"\n"
|
38
|
+
end
|
39
|
+
|
40
|
+
rule white
|
41
|
+
[ \t]
|
42
|
+
end
|
43
|
+
end
|
data/test/helper.rb
ADDED
data/test/suite.rb
ADDED
data/test/test_dcf.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.dirname(__FILE__) + '/helper'
|
3
|
+
|
4
|
+
class TestDcfParser < Test::Unit::TestCase
|
5
|
+
|
6
|
+
should "parse a description file" do
|
7
|
+
description = <<EOF
|
8
|
+
Package: RSQLite
|
9
|
+
Version: 0.7-1
|
10
|
+
Title: SQLite interface for R
|
11
|
+
Author: David A. James
|
12
|
+
Maintainer: Seth Falcon <seth@userprimary.net>
|
13
|
+
Description: Database Interface R driver for SQLite. This package
|
14
|
+
embeds the SQLite database engine in R and provides an
|
15
|
+
interface compliant with the DBI package. The source for the
|
16
|
+
SQLite engine (version 3.6.4) is included.
|
17
|
+
LazyLoad: yes
|
18
|
+
Depends: R (>= 2.6.0), methods, DBI (>= 0.2-3)
|
19
|
+
Imports: methods, DBI (>= 0.2-3)
|
20
|
+
License: LGPL (>= 2)
|
21
|
+
Collate: zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R
|
22
|
+
Packaged: Sat Oct 25 18:54:05 2008; seth
|
23
|
+
Repository: CRAN
|
24
|
+
Date/Publication: 2008-10-26 18:29:06
|
25
|
+
EOF
|
26
|
+
sqlite = Dcf.parse(description).first
|
27
|
+
assert_equal 14, sqlite.keys.length
|
28
|
+
assert_equal "RSQLite", sqlite["Package"]
|
29
|
+
assert_equal "2008-10-26 18:29:06", sqlite["Date/Publication"]
|
30
|
+
assert_equal "zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R", sqlite["Collate"]
|
31
|
+
end
|
32
|
+
|
33
|
+
should "parse badly formatted description files" do
|
34
|
+
description = <<EOF
|
35
|
+
Package: StatFingerprints
|
36
|
+
Version: 1.3
|
37
|
+
Depends:
|
38
|
+
rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot
|
39
|
+
EOF
|
40
|
+
stat = Dcf.parse(description).first
|
41
|
+
assert_equal "rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot", stat["Depends"]
|
42
|
+
end
|
43
|
+
|
44
|
+
should "parse a simple entry" do
|
45
|
+
parse = Dcf.parse("Attr: Value\n").first
|
46
|
+
assert_equal ["Attr"], parse.keys
|
47
|
+
assert_equal "Value", parse["Attr"]
|
48
|
+
end
|
49
|
+
|
50
|
+
should "ignore trailing whitespace" do
|
51
|
+
parse = Dcf.parse("Attr: Value \n").first
|
52
|
+
assert_equal "Value", parse["Attr"]
|
53
|
+
end
|
54
|
+
|
55
|
+
should "parse values that contains colons" do
|
56
|
+
sample = <<EOF
|
57
|
+
Package: BCE
|
58
|
+
Version: 1.3
|
59
|
+
Title: Bayesian composition estimator: estimating sample (taxonomic)
|
60
|
+
composition from biomarker data
|
61
|
+
Author: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>, Karline
|
62
|
+
Soetaert <k.soetaert@nioo.knaw.nl>
|
63
|
+
Maintainer: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>
|
64
|
+
Depends: R (>= 2.01), limSolve
|
65
|
+
Description: Function to estimates taxonomic compositions from
|
66
|
+
biomarker data, using a Bayesian approach.
|
67
|
+
License: GPL
|
68
|
+
LazyData: yes
|
69
|
+
Repository: CRAN
|
70
|
+
Repository/R-Forge/Project: bce
|
71
|
+
Repository/R-Forge/Revision: 43
|
72
|
+
Date/Publication: 2009-06-03 20:45:44
|
73
|
+
Packaged: 2009-06-03 02:59:04 UTC; rforge
|
74
|
+
EOF
|
75
|
+
parse = Dcf.parse(sample).first
|
76
|
+
assert_equal "Bayesian composition estimator: estimating sample (taxonomic) " +
|
77
|
+
"composition from biomarker data", parse["Title"]
|
78
|
+
end
|
79
|
+
|
80
|
+
should "parse multiple paragraphs" do
|
81
|
+
sample = <<EOF
|
82
|
+
Package: AIGIS
|
83
|
+
Version: 1.0
|
84
|
+
Depends: R (>= 2.5.1), gpclib (>= 1.4)
|
85
|
+
|
86
|
+
Package: AIS
|
87
|
+
Version: 1.0
|
88
|
+
Depends: R (>= 2.1.0)
|
89
|
+
Suggests: R2HTML
|
90
|
+
EOF
|
91
|
+
parse = Dcf.parse(sample)
|
92
|
+
assert_equal "AIGIS", parse[0]["Package"]
|
93
|
+
assert_equal "R (>= 2.5.1), gpclib (>= 1.4)", parse[0]["Depends"]
|
94
|
+
assert_equal "AIS", parse[1]["Package"]
|
95
|
+
end
|
96
|
+
|
97
|
+
should "parse multiline field values" do
|
98
|
+
sample = <<EOF
|
99
|
+
Package: ADaCGH
|
100
|
+
Version: 1.3-10
|
101
|
+
Depends: R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD,
|
102
|
+
waveslim, cluster, snapCGH, Hmisc
|
103
|
+
Suggests: Rmpi, GLAD, DNAcopy
|
104
|
+
EOF
|
105
|
+
parse = Dcf.parse(sample).first
|
106
|
+
assert_equal "R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD, " +
|
107
|
+
"waveslim, cluster, snapCGH, Hmisc",
|
108
|
+
parse["Depends"]
|
109
|
+
end
|
110
|
+
|
111
|
+
should "parse file with UTF-8 data" do
|
112
|
+
sample = <<EOF
|
113
|
+
Author: Vincent Goulet, Sébastien Auclair, Christophe Dutang, Xavier
|
114
|
+
Milhaud, Tommy Ouellet, Louis-Philippe Pouliot, Mathieu Pigeon
|
115
|
+
Encoding: latin1
|
116
|
+
Packaged: Tue May 26 10:40:31 2009; Grömping
|
117
|
+
Repository: CRAN
|
118
|
+
Date/Publication: 2009-05-26 09:23:28
|
119
|
+
EOF
|
120
|
+
sample = (RUBY_VERSION =~ /1.9/) ? sample.force_encoding("utf-8") : sample
|
121
|
+
match = (RUBY_VERSION =~ /1.9/) ? "Grömping".force_encoding("utf-8") : "Grömping"
|
122
|
+
parse = Dcf.parse(sample).first
|
123
|
+
assert_not_nil parse
|
124
|
+
assert_match "Sébastien Auclair", parse["Author"]
|
125
|
+
assert_equal "Tue May 26 10:40:31 2009; #{match}", parse["Packaged"]
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: treetop-dcf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- "Bj\xC3\xB8rn Arild M\xC3\xA6land"
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-28 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: treetop
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.5
|
24
|
+
version:
|
25
|
+
description: Easy to use parser for Debian Control Files.
|
26
|
+
email: bjorn.maeland@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README.markdown
|
33
|
+
files:
|
34
|
+
- lib/dcf.rb
|
35
|
+
- lib/dcf_grammar.treetop
|
36
|
+
- README.markdown
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: http://github.com/Chrononaut/treetop-dcf
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options:
|
43
|
+
- --charset=UTF-8
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.3.5
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: Easy to use parser for Debian Control Files.
|
65
|
+
test_files:
|
66
|
+
- test/helper.rb
|
67
|
+
- test/test_dcf.rb
|
68
|
+
- test/suite.rb
|