treetop-dcf 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +34 -0
- data/lib/dcf.rb +24 -0
- data/lib/dcf_grammar.treetop +43 -0
- data/test/helper.rb +6 -0
- data/test/suite.rb +9 -0
- data/test/test_dcf.rb +128 -0
- metadata +68 -0
data/README.markdown
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# treetop-dcf
|
2
|
+
|
3
|
+
Easy to use parser for
|
4
|
+
[Debian Control
|
5
|
+
Files](http://www.debian.org/doc/debian-policy/ch-controlfields.html).
|
6
|
+
The format is very similar to YAML, but with some subtle differences (e.g.
|
7
|
+
YAML requires a space after field names, DCF does not).
|
8
|
+
|
9
|
+
Parsed paragraphs are returned as hashes in an array. Example:
|
10
|
+
|
11
|
+
irb(main):001:0> require "dcf"
|
12
|
+
=> true
|
13
|
+
irb(main):002:0> Dcf.parse "Attr: Value"
|
14
|
+
=> [{"Attr"=>"Value"}]
|
15
|
+
|
16
|
+
The parser is currently quite slow, so if the files you're parsing allows it
|
17
|
+
I'd recommend using this YAML based parser instead: http://gist.github.com/117293
|
18
|
+
Its much faster but fails on certain valid files.
|
19
|
+
|
20
|
+
## Installation
|
21
|
+
|
22
|
+
sudo gem install treetop-dcf
|
23
|
+
|
24
|
+
## Acknowledgements
|
25
|
+
|
26
|
+
This project was created during Google Summer of Code 2009, as part of my project
|
27
|
+
for the [R Foundation for Statistical
|
28
|
+
Computing](http://www.r-project.org/foundation/main.html). (CRAN uses DCF for
|
29
|
+
package information, and we needed a parser written in Ruby.) Therefore, a big
|
30
|
+
thanks goes out to both the R Foundation and Google.
|
31
|
+
|
32
|
+
## Author
|
33
|
+
|
34
|
+
Bjørn Arild Mæland <bjorn.maeland at gmail.com>
|
data/lib/dcf.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
require "treetop"
|
3
|
+
require File.join(File.dirname(__FILE__) + "/dcf_grammar")
|
4
|
+
|
5
|
+
module Dcf
|
6
|
+
# @param [String] input
|
7
|
+
# @return [Array, nil] An array of { attr => val } hashes or nil if failure
|
8
|
+
def self.parse(input)
|
9
|
+
parse = DcfParser.new.parse(input)
|
10
|
+
return if parse.nil?
|
11
|
+
|
12
|
+
parse.elements.collect do |i|
|
13
|
+
paragraph = {}
|
14
|
+
i.paragraph.elements.each do |row|
|
15
|
+
paragraph[row.field.attribute.text_value] = row.field.value.text_value
|
16
|
+
end
|
17
|
+
paragraph
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.version
|
22
|
+
"0.2.0"
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
grammar Dcf
|
2
|
+
rule paragraphs
|
3
|
+
(paragraph eol?)+
|
4
|
+
end
|
5
|
+
|
6
|
+
rule paragraph
|
7
|
+
(field eol?)+
|
8
|
+
end
|
9
|
+
|
10
|
+
rule field
|
11
|
+
attribute separator value
|
12
|
+
end
|
13
|
+
|
14
|
+
rule attribute
|
15
|
+
(!":" !eol .)+
|
16
|
+
end
|
17
|
+
|
18
|
+
rule value
|
19
|
+
eol? (!eol .)+ (eol white+ (!eol .)+)*
|
20
|
+
{
|
21
|
+
def text_value
|
22
|
+
# I don't really like this, but it works for the files I'm parsing, so..
|
23
|
+
super.strip.gsub(/\n */, ' ')
|
24
|
+
end
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
rule next_record
|
29
|
+
"\n" [a-zA-Z]
|
30
|
+
end
|
31
|
+
|
32
|
+
rule separator
|
33
|
+
':' white*
|
34
|
+
end
|
35
|
+
|
36
|
+
rule eol
|
37
|
+
"\n"
|
38
|
+
end
|
39
|
+
|
40
|
+
rule white
|
41
|
+
[ \t]
|
42
|
+
end
|
43
|
+
end
|
data/test/helper.rb
ADDED
data/test/suite.rb
ADDED
data/test/test_dcf.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.dirname(__FILE__) + '/helper'
|
3
|
+
|
4
|
+
class TestDcfParser < Test::Unit::TestCase
|
5
|
+
|
6
|
+
should "parse a description file" do
|
7
|
+
description = <<EOF
|
8
|
+
Package: RSQLite
|
9
|
+
Version: 0.7-1
|
10
|
+
Title: SQLite interface for R
|
11
|
+
Author: David A. James
|
12
|
+
Maintainer: Seth Falcon <seth@userprimary.net>
|
13
|
+
Description: Database Interface R driver for SQLite. This package
|
14
|
+
embeds the SQLite database engine in R and provides an
|
15
|
+
interface compliant with the DBI package. The source for the
|
16
|
+
SQLite engine (version 3.6.4) is included.
|
17
|
+
LazyLoad: yes
|
18
|
+
Depends: R (>= 2.6.0), methods, DBI (>= 0.2-3)
|
19
|
+
Imports: methods, DBI (>= 0.2-3)
|
20
|
+
License: LGPL (>= 2)
|
21
|
+
Collate: zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R
|
22
|
+
Packaged: Sat Oct 25 18:54:05 2008; seth
|
23
|
+
Repository: CRAN
|
24
|
+
Date/Publication: 2008-10-26 18:29:06
|
25
|
+
EOF
|
26
|
+
sqlite = Dcf.parse(description).first
|
27
|
+
assert_equal 14, sqlite.keys.length
|
28
|
+
assert_equal "RSQLite", sqlite["Package"]
|
29
|
+
assert_equal "2008-10-26 18:29:06", sqlite["Date/Publication"]
|
30
|
+
assert_equal "zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R", sqlite["Collate"]
|
31
|
+
end
|
32
|
+
|
33
|
+
should "parse badly formatted description files" do
|
34
|
+
description = <<EOF
|
35
|
+
Package: StatFingerprints
|
36
|
+
Version: 1.3
|
37
|
+
Depends:
|
38
|
+
rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot
|
39
|
+
EOF
|
40
|
+
stat = Dcf.parse(description).first
|
41
|
+
assert_equal "rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot", stat["Depends"]
|
42
|
+
end
|
43
|
+
|
44
|
+
should "parse a simple entry" do
|
45
|
+
parse = Dcf.parse("Attr: Value\n").first
|
46
|
+
assert_equal ["Attr"], parse.keys
|
47
|
+
assert_equal "Value", parse["Attr"]
|
48
|
+
end
|
49
|
+
|
50
|
+
should "ignore trailing whitespace" do
|
51
|
+
parse = Dcf.parse("Attr: Value \n").first
|
52
|
+
assert_equal "Value", parse["Attr"]
|
53
|
+
end
|
54
|
+
|
55
|
+
should "parse values that contains colons" do
|
56
|
+
sample = <<EOF
|
57
|
+
Package: BCE
|
58
|
+
Version: 1.3
|
59
|
+
Title: Bayesian composition estimator: estimating sample (taxonomic)
|
60
|
+
composition from biomarker data
|
61
|
+
Author: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>, Karline
|
62
|
+
Soetaert <k.soetaert@nioo.knaw.nl>
|
63
|
+
Maintainer: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>
|
64
|
+
Depends: R (>= 2.01), limSolve
|
65
|
+
Description: Function to estimates taxonomic compositions from
|
66
|
+
biomarker data, using a Bayesian approach.
|
67
|
+
License: GPL
|
68
|
+
LazyData: yes
|
69
|
+
Repository: CRAN
|
70
|
+
Repository/R-Forge/Project: bce
|
71
|
+
Repository/R-Forge/Revision: 43
|
72
|
+
Date/Publication: 2009-06-03 20:45:44
|
73
|
+
Packaged: 2009-06-03 02:59:04 UTC; rforge
|
74
|
+
EOF
|
75
|
+
parse = Dcf.parse(sample).first
|
76
|
+
assert_equal "Bayesian composition estimator: estimating sample (taxonomic) " +
|
77
|
+
"composition from biomarker data", parse["Title"]
|
78
|
+
end
|
79
|
+
|
80
|
+
should "parse multiple paragraphs" do
|
81
|
+
sample = <<EOF
|
82
|
+
Package: AIGIS
|
83
|
+
Version: 1.0
|
84
|
+
Depends: R (>= 2.5.1), gpclib (>= 1.4)
|
85
|
+
|
86
|
+
Package: AIS
|
87
|
+
Version: 1.0
|
88
|
+
Depends: R (>= 2.1.0)
|
89
|
+
Suggests: R2HTML
|
90
|
+
EOF
|
91
|
+
parse = Dcf.parse(sample)
|
92
|
+
assert_equal "AIGIS", parse[0]["Package"]
|
93
|
+
assert_equal "R (>= 2.5.1), gpclib (>= 1.4)", parse[0]["Depends"]
|
94
|
+
assert_equal "AIS", parse[1]["Package"]
|
95
|
+
end
|
96
|
+
|
97
|
+
should "parse multiline field values" do
|
98
|
+
sample = <<EOF
|
99
|
+
Package: ADaCGH
|
100
|
+
Version: 1.3-10
|
101
|
+
Depends: R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD,
|
102
|
+
waveslim, cluster, snapCGH, Hmisc
|
103
|
+
Suggests: Rmpi, GLAD, DNAcopy
|
104
|
+
EOF
|
105
|
+
parse = Dcf.parse(sample).first
|
106
|
+
assert_equal "R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD, " +
|
107
|
+
"waveslim, cluster, snapCGH, Hmisc",
|
108
|
+
parse["Depends"]
|
109
|
+
end
|
110
|
+
|
111
|
+
should "parse file with UTF-8 data" do
|
112
|
+
sample = <<EOF
|
113
|
+
Author: Vincent Goulet, Sébastien Auclair, Christophe Dutang, Xavier
|
114
|
+
Milhaud, Tommy Ouellet, Louis-Philippe Pouliot, Mathieu Pigeon
|
115
|
+
Encoding: latin1
|
116
|
+
Packaged: Tue May 26 10:40:31 2009; Grömping
|
117
|
+
Repository: CRAN
|
118
|
+
Date/Publication: 2009-05-26 09:23:28
|
119
|
+
EOF
|
120
|
+
sample = (RUBY_VERSION =~ /1.9/) ? sample.force_encoding("utf-8") : sample
|
121
|
+
match = (RUBY_VERSION =~ /1.9/) ? "Grömping".force_encoding("utf-8") : "Grömping"
|
122
|
+
parse = Dcf.parse(sample).first
|
123
|
+
assert_not_nil parse
|
124
|
+
assert_match "Sébastien Auclair", parse["Author"]
|
125
|
+
assert_equal "Tue May 26 10:40:31 2009; #{match}", parse["Packaged"]
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: treetop-dcf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- "Bj\xC3\xB8rn Arild M\xC3\xA6land"
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-10-28 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: treetop
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.5
|
24
|
+
version:
|
25
|
+
description: Easy to use parser for Debian Control Files.
|
26
|
+
email: bjorn.maeland@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README.markdown
|
33
|
+
files:
|
34
|
+
- lib/dcf.rb
|
35
|
+
- lib/dcf_grammar.treetop
|
36
|
+
- README.markdown
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: http://github.com/Chrononaut/treetop-dcf
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options:
|
43
|
+
- --charset=UTF-8
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: "0"
|
51
|
+
version:
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
version:
|
58
|
+
requirements: []
|
59
|
+
|
60
|
+
rubyforge_project:
|
61
|
+
rubygems_version: 1.3.5
|
62
|
+
signing_key:
|
63
|
+
specification_version: 3
|
64
|
+
summary: Easy to use parser for Debian Control Files.
|
65
|
+
test_files:
|
66
|
+
- test/helper.rb
|
67
|
+
- test/test_dcf.rb
|
68
|
+
- test/suite.rb
|