treetop-dcf 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,34 @@
1
+ # treetop-dcf
2
+
3
+ Easy to use parser for
4
+ [Debian Control
5
+ Files](http://www.debian.org/doc/debian-policy/ch-controlfields.html).
6
+ The format is very similar to YAML, but with some subtle differences (e.g.
7
+ YAML requires a space after field names, DCF does not).
8
+
9
+ Parsed paragraphs are returned as hashes in an array. Example:
10
+
11
+ irb(main):001:0> require "dcf"
12
+ => true
13
+ irb(main):002:0> Dcf.parse "Attr: Value"
14
+ => [{"Attr"=>"Value"}]
15
+
16
+ The parser is currently quite slow, so if the files you're parsing allows it
17
+ I'd recommend using this YAML based parser instead: http://gist.github.com/117293
18
+ Its much faster but fails on certain valid files.
19
+
20
+ ## Installation
21
+
22
+ sudo gem install treetop-dcf
23
+
24
+ ## Acknowledgements
25
+
26
+ This project was created during Google Summer of Code 2009, as part of my project
27
+ for the [R Foundation for Statistical
28
+ Computing](http://www.r-project.org/foundation/main.html). (CRAN uses DCF for
29
+ package information, and we needed a parser written in Ruby.) Therefore, a big
30
+ thanks goes out to both the R Foundation and Google.
31
+
32
+ ## Author
33
+
34
+ Bjørn Arild Mæland <bjorn.maeland at gmail.com>
@@ -0,0 +1,24 @@
1
+ require "rubygems"
2
+ require "treetop"
3
+ require File.join(File.dirname(__FILE__) + "/dcf_grammar")
4
+
5
+ module Dcf
6
+ # @param [String] input
7
+ # @return [Array, nil] An array of { attr => val } hashes or nil if failure
8
+ def self.parse(input)
9
+ parse = DcfParser.new.parse(input)
10
+ return if parse.nil?
11
+
12
+ parse.elements.collect do |i|
13
+ paragraph = {}
14
+ i.paragraph.elements.each do |row|
15
+ paragraph[row.field.attribute.text_value] = row.field.value.text_value
16
+ end
17
+ paragraph
18
+ end
19
+ end
20
+
21
+ def self.version
22
+ "0.2.0"
23
+ end
24
+ end
@@ -0,0 +1,43 @@
1
+ grammar Dcf
2
+ rule paragraphs
3
+ (paragraph eol?)+
4
+ end
5
+
6
+ rule paragraph
7
+ (field eol?)+
8
+ end
9
+
10
+ rule field
11
+ attribute separator value
12
+ end
13
+
14
+ rule attribute
15
+ (!":" !eol .)+
16
+ end
17
+
18
+ rule value
19
+ eol? (!eol .)+ (eol white+ (!eol .)+)*
20
+ {
21
+ def text_value
22
+ # I don't really like this, but it works for the files I'm parsing, so..
23
+ super.strip.gsub(/\n */, ' ')
24
+ end
25
+ }
26
+ end
27
+
28
+ rule next_record
29
+ "\n" [a-zA-Z]
30
+ end
31
+
32
+ rule separator
33
+ ':' white*
34
+ end
35
+
36
+ rule eol
37
+ "\n"
38
+ end
39
+
40
+ rule white
41
+ [ \t]
42
+ end
43
+ end
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+
3
+ require File.join(File.dirname(__FILE__), *%w[.. lib dcf])
4
+
5
+ require 'test/unit'
6
+ require 'shoulda'
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+
3
+ # for some reason these tests fail when run via TextMate
4
+ # but succeed when run on the command line.
5
+
6
+ tests = Dir["#{File.dirname(__FILE__)}/test_*.rb"]
7
+ tests.each do |file|
8
+ require file
9
+ end
@@ -0,0 +1,128 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.dirname(__FILE__) + '/helper'
3
+
4
+ class TestDcfParser < Test::Unit::TestCase
5
+
6
+ should "parse a description file" do
7
+ description = <<EOF
8
+ Package: RSQLite
9
+ Version: 0.7-1
10
+ Title: SQLite interface for R
11
+ Author: David A. James
12
+ Maintainer: Seth Falcon <seth@userprimary.net>
13
+ Description: Database Interface R driver for SQLite. This package
14
+ embeds the SQLite database engine in R and provides an
15
+ interface compliant with the DBI package. The source for the
16
+ SQLite engine (version 3.6.4) is included.
17
+ LazyLoad: yes
18
+ Depends: R (>= 2.6.0), methods, DBI (>= 0.2-3)
19
+ Imports: methods, DBI (>= 0.2-3)
20
+ License: LGPL (>= 2)
21
+ Collate: zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R
22
+ Packaged: Sat Oct 25 18:54:05 2008; seth
23
+ Repository: CRAN
24
+ Date/Publication: 2008-10-26 18:29:06
25
+ EOF
26
+ sqlite = Dcf.parse(description).first
27
+ assert_equal 14, sqlite.keys.length
28
+ assert_equal "RSQLite", sqlite["Package"]
29
+ assert_equal "2008-10-26 18:29:06", sqlite["Date/Publication"]
30
+ assert_equal "zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R", sqlite["Collate"]
31
+ end
32
+
33
+ should "parse badly formatted description files" do
34
+ description = <<EOF
35
+ Package: StatFingerprints
36
+ Version: 1.3
37
+ Depends:
38
+ rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot
39
+ EOF
40
+ stat = Dcf.parse(description).first
41
+ assert_equal "rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot", stat["Depends"]
42
+ end
43
+
44
+ should "parse a simple entry" do
45
+ parse = Dcf.parse("Attr: Value\n").first
46
+ assert_equal ["Attr"], parse.keys
47
+ assert_equal "Value", parse["Attr"]
48
+ end
49
+
50
+ should "ignore trailing whitespace" do
51
+ parse = Dcf.parse("Attr: Value \n").first
52
+ assert_equal "Value", parse["Attr"]
53
+ end
54
+
55
+ should "parse values that contains colons" do
56
+ sample = <<EOF
57
+ Package: BCE
58
+ Version: 1.3
59
+ Title: Bayesian composition estimator: estimating sample (taxonomic)
60
+ composition from biomarker data
61
+ Author: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>, Karline
62
+ Soetaert <k.soetaert@nioo.knaw.nl>
63
+ Maintainer: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>
64
+ Depends: R (>= 2.01), limSolve
65
+ Description: Function to estimates taxonomic compositions from
66
+ biomarker data, using a Bayesian approach.
67
+ License: GPL
68
+ LazyData: yes
69
+ Repository: CRAN
70
+ Repository/R-Forge/Project: bce
71
+ Repository/R-Forge/Revision: 43
72
+ Date/Publication: 2009-06-03 20:45:44
73
+ Packaged: 2009-06-03 02:59:04 UTC; rforge
74
+ EOF
75
+ parse = Dcf.parse(sample).first
76
+ assert_equal "Bayesian composition estimator: estimating sample (taxonomic) " +
77
+ "composition from biomarker data", parse["Title"]
78
+ end
79
+
80
+ should "parse multiple paragraphs" do
81
+ sample = <<EOF
82
+ Package: AIGIS
83
+ Version: 1.0
84
+ Depends: R (>= 2.5.1), gpclib (>= 1.4)
85
+
86
+ Package: AIS
87
+ Version: 1.0
88
+ Depends: R (>= 2.1.0)
89
+ Suggests: R2HTML
90
+ EOF
91
+ parse = Dcf.parse(sample)
92
+ assert_equal "AIGIS", parse[0]["Package"]
93
+ assert_equal "R (>= 2.5.1), gpclib (>= 1.4)", parse[0]["Depends"]
94
+ assert_equal "AIS", parse[1]["Package"]
95
+ end
96
+
97
+ should "parse multiline field values" do
98
+ sample = <<EOF
99
+ Package: ADaCGH
100
+ Version: 1.3-10
101
+ Depends: R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD,
102
+ waveslim, cluster, snapCGH, Hmisc
103
+ Suggests: Rmpi, GLAD, DNAcopy
104
+ EOF
105
+ parse = Dcf.parse(sample).first
106
+ assert_equal "R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD, " +
107
+ "waveslim, cluster, snapCGH, Hmisc",
108
+ parse["Depends"]
109
+ end
110
+
111
+ should "parse file with UTF-8 data" do
112
+ sample = <<EOF
113
+ Author: Vincent Goulet, Sébastien Auclair, Christophe Dutang, Xavier
114
+ Milhaud, Tommy Ouellet, Louis-Philippe Pouliot, Mathieu Pigeon
115
+ Encoding: latin1
116
+ Packaged: Tue May 26 10:40:31 2009; Grömping
117
+ Repository: CRAN
118
+ Date/Publication: 2009-05-26 09:23:28
119
+ EOF
120
+ sample = (RUBY_VERSION =~ /1.9/) ? sample.force_encoding("utf-8") : sample
121
+ match = (RUBY_VERSION =~ /1.9/) ? "Grömping".force_encoding("utf-8") : "Grömping"
122
+ parse = Dcf.parse(sample).first
123
+ assert_not_nil parse
124
+ assert_match "Sébastien Auclair", parse["Author"]
125
+ assert_equal "Tue May 26 10:40:31 2009; #{match}", parse["Packaged"]
126
+ end
127
+
128
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: treetop-dcf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - "Bj\xC3\xB8rn Arild M\xC3\xA6land"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-28 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: treetop
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.5
24
+ version:
25
+ description: Easy to use parser for Debian Control Files.
26
+ email: bjorn.maeland@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.markdown
33
+ files:
34
+ - lib/dcf.rb
35
+ - lib/dcf_grammar.treetop
36
+ - README.markdown
37
+ has_rdoc: true
38
+ homepage: http://github.com/Chrononaut/treetop-dcf
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --charset=UTF-8
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ requirements: []
59
+
60
+ rubyforge_project:
61
+ rubygems_version: 1.3.5
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: Easy to use parser for Debian Control Files.
65
+ test_files:
66
+ - test/helper.rb
67
+ - test/test_dcf.rb
68
+ - test/suite.rb