treetop-dcf 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ # treetop-dcf
2
+
3
+ Easy to use parser for
4
+ [Debian Control
5
+ Files](http://www.debian.org/doc/debian-policy/ch-controlfields.html).
6
+ The format is very similar to YAML, but with some subtle differences (e.g.
7
+ YAML requires a space after field names, DCF does not).
8
+
9
+ Parsed paragraphs are returned as hashes in an array. Example:
10
+
11
+ irb(main):001:0> require "dcf"
12
+ => true
13
+ irb(main):002:0> Dcf.parse "Attr: Value"
14
+ => [{"Attr"=>"Value"}]
15
+
16
+ The parser is currently quite slow, so if the files you're parsing allows it
17
+ I'd recommend using this YAML based parser instead: http://gist.github.com/117293
18
+ Its much faster but fails on certain valid files.
19
+
20
+ ## Installation
21
+
22
+ sudo gem install treetop-dcf
23
+
24
+ ## Acknowledgements
25
+
26
+ This project was created during Google Summer of Code 2009, as part of my project
27
+ for the [R Foundation for Statistical
28
+ Computing](http://www.r-project.org/foundation/main.html). (CRAN uses DCF for
29
+ package information, and we needed a parser written in Ruby.) Therefore, a big
30
+ thanks goes out to both the R Foundation and Google.
31
+
32
+ ## Author
33
+
34
+ Bjørn Arild Mæland <bjorn.maeland at gmail.com>
@@ -0,0 +1,24 @@
1
+ require "rubygems"
2
+ require "treetop"
3
+ require File.join(File.dirname(__FILE__) + "/dcf_grammar")
4
+
5
+ module Dcf
6
+ # @param [String] input
7
+ # @return [Array, nil] An array of { attr => val } hashes or nil if failure
8
+ def self.parse(input)
9
+ parse = DcfParser.new.parse(input)
10
+ return if parse.nil?
11
+
12
+ parse.elements.collect do |i|
13
+ paragraph = {}
14
+ i.paragraph.elements.each do |row|
15
+ paragraph[row.field.attribute.text_value] = row.field.value.text_value
16
+ end
17
+ paragraph
18
+ end
19
+ end
20
+
21
+ def self.version
22
+ "0.2.0"
23
+ end
24
+ end
@@ -0,0 +1,43 @@
1
+ grammar Dcf
2
+ rule paragraphs
3
+ (paragraph eol?)+
4
+ end
5
+
6
+ rule paragraph
7
+ (field eol?)+
8
+ end
9
+
10
+ rule field
11
+ attribute separator value
12
+ end
13
+
14
+ rule attribute
15
+ (!":" !eol .)+
16
+ end
17
+
18
+ rule value
19
+ eol? (!eol .)+ (eol white+ (!eol .)+)*
20
+ {
21
+ def text_value
22
+ # I don't really like this, but it works for the files I'm parsing, so..
23
+ super.strip.gsub(/\n */, ' ')
24
+ end
25
+ }
26
+ end
27
+
28
+ rule next_record
29
+ "\n" [a-zA-Z]
30
+ end
31
+
32
+ rule separator
33
+ ':' white*
34
+ end
35
+
36
+ rule eol
37
+ "\n"
38
+ end
39
+
40
+ rule white
41
+ [ \t]
42
+ end
43
+ end
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+
3
+ require File.join(File.dirname(__FILE__), *%w[.. lib dcf])
4
+
5
+ require 'test/unit'
6
+ require 'shoulda'
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+
3
+ # for some reason these tests fail when run via TextMate
4
+ # but succeed when run on the command line.
5
+
6
+ tests = Dir["#{File.dirname(__FILE__)}/test_*.rb"]
7
+ tests.each do |file|
8
+ require file
9
+ end
@@ -0,0 +1,128 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.dirname(__FILE__) + '/helper'
3
+
4
+ class TestDcfParser < Test::Unit::TestCase
5
+
6
+ should "parse a description file" do
7
+ description = <<EOF
8
+ Package: RSQLite
9
+ Version: 0.7-1
10
+ Title: SQLite interface for R
11
+ Author: David A. James
12
+ Maintainer: Seth Falcon <seth@userprimary.net>
13
+ Description: Database Interface R driver for SQLite. This package
14
+ embeds the SQLite database engine in R and provides an
15
+ interface compliant with the DBI package. The source for the
16
+ SQLite engine (version 3.6.4) is included.
17
+ LazyLoad: yes
18
+ Depends: R (>= 2.6.0), methods, DBI (>= 0.2-3)
19
+ Imports: methods, DBI (>= 0.2-3)
20
+ License: LGPL (>= 2)
21
+ Collate: zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R
22
+ Packaged: Sat Oct 25 18:54:05 2008; seth
23
+ Repository: CRAN
24
+ Date/Publication: 2008-10-26 18:29:06
25
+ EOF
26
+ sqlite = Dcf.parse(description).first
27
+ assert_equal 14, sqlite.keys.length
28
+ assert_equal "RSQLite", sqlite["Package"]
29
+ assert_equal "2008-10-26 18:29:06", sqlite["Date/Publication"]
30
+ assert_equal "zzz.R S4R.R dbObjectId.R SQLite.R SQLiteSupport.R", sqlite["Collate"]
31
+ end
32
+
33
+ should "parse badly formatted description files" do
34
+ description = <<EOF
35
+ Package: StatFingerprints
36
+ Version: 1.3
37
+ Depends:
38
+ rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot
39
+ EOF
40
+ stat = Dcf.parse(description).first
41
+ assert_equal "rgl,car,MASS,ffmanova,akima,labdsv,RColorBrewer,vegan,tcltk2,tcltk,maptools,sciplot", stat["Depends"]
42
+ end
43
+
44
+ should "parse a simple entry" do
45
+ parse = Dcf.parse("Attr: Value\n").first
46
+ assert_equal ["Attr"], parse.keys
47
+ assert_equal "Value", parse["Attr"]
48
+ end
49
+
50
+ should "ignore trailing whitespace" do
51
+ parse = Dcf.parse("Attr: Value \n").first
52
+ assert_equal "Value", parse["Attr"]
53
+ end
54
+
55
+ should "parse values that contains colons" do
56
+ sample = <<EOF
57
+ Package: BCE
58
+ Version: 1.3
59
+ Title: Bayesian composition estimator: estimating sample (taxonomic)
60
+ composition from biomarker data
61
+ Author: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>, Karline
62
+ Soetaert <k.soetaert@nioo.knaw.nl>
63
+ Maintainer: Karel Van den Meersche <k.vdmeersche@nioo.knaw.nl>
64
+ Depends: R (>= 2.01), limSolve
65
+ Description: Function to estimates taxonomic compositions from
66
+ biomarker data, using a Bayesian approach.
67
+ License: GPL
68
+ LazyData: yes
69
+ Repository: CRAN
70
+ Repository/R-Forge/Project: bce
71
+ Repository/R-Forge/Revision: 43
72
+ Date/Publication: 2009-06-03 20:45:44
73
+ Packaged: 2009-06-03 02:59:04 UTC; rforge
74
+ EOF
75
+ parse = Dcf.parse(sample).first
76
+ assert_equal "Bayesian composition estimator: estimating sample (taxonomic) " +
77
+ "composition from biomarker data", parse["Title"]
78
+ end
79
+
80
+ should "parse multiple paragraphs" do
81
+ sample = <<EOF
82
+ Package: AIGIS
83
+ Version: 1.0
84
+ Depends: R (>= 2.5.1), gpclib (>= 1.4)
85
+
86
+ Package: AIS
87
+ Version: 1.0
88
+ Depends: R (>= 2.1.0)
89
+ Suggests: R2HTML
90
+ EOF
91
+ parse = Dcf.parse(sample)
92
+ assert_equal "AIGIS", parse[0]["Package"]
93
+ assert_equal "R (>= 2.5.1), gpclib (>= 1.4)", parse[0]["Depends"]
94
+ assert_equal "AIS", parse[1]["Package"]
95
+ end
96
+
97
+ should "parse multiline field values" do
98
+ sample = <<EOF
99
+ Package: ADaCGH
100
+ Version: 1.3-10
101
+ Depends: R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD,
102
+ waveslim, cluster, snapCGH, Hmisc
103
+ Suggests: Rmpi, GLAD, DNAcopy
104
+ EOF
105
+ parse = Dcf.parse(sample).first
106
+ assert_equal "R (>= 2.2.1), cgh, tilingArray, aCGH, cghMCR, papply, GDD, " +
107
+ "waveslim, cluster, snapCGH, Hmisc",
108
+ parse["Depends"]
109
+ end
110
+
111
+ should "parse file with UTF-8 data" do
112
+ sample = <<EOF
113
+ Author: Vincent Goulet, Sébastien Auclair, Christophe Dutang, Xavier
114
+ Milhaud, Tommy Ouellet, Louis-Philippe Pouliot, Mathieu Pigeon
115
+ Encoding: latin1
116
+ Packaged: Tue May 26 10:40:31 2009; Grömping
117
+ Repository: CRAN
118
+ Date/Publication: 2009-05-26 09:23:28
119
+ EOF
120
+ sample = (RUBY_VERSION =~ /1.9/) ? sample.force_encoding("utf-8") : sample
121
+ match = (RUBY_VERSION =~ /1.9/) ? "Grömping".force_encoding("utf-8") : "Grömping"
122
+ parse = Dcf.parse(sample).first
123
+ assert_not_nil parse
124
+ assert_match "Sébastien Auclair", parse["Author"]
125
+ assert_equal "Tue May 26 10:40:31 2009; #{match}", parse["Packaged"]
126
+ end
127
+
128
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: treetop-dcf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - "Bj\xC3\xB8rn Arild M\xC3\xA6land"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-28 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: treetop
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.5
24
+ version:
25
+ description: Easy to use parser for Debian Control Files.
26
+ email: bjorn.maeland@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.markdown
33
+ files:
34
+ - lib/dcf.rb
35
+ - lib/dcf_grammar.treetop
36
+ - README.markdown
37
+ has_rdoc: true
38
+ homepage: http://github.com/Chrononaut/treetop-dcf
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --charset=UTF-8
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ requirements: []
59
+
60
+ rubyforge_project:
61
+ rubygems_version: 1.3.5
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: Easy to use parser for Debian Control Files.
65
+ test_files:
66
+ - test/helper.rb
67
+ - test/test_dcf.rb
68
+ - test/suite.rb