ddi-parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/ddi-parser.rb +161 -0
- data/lib/models/catalog.rb +10 -0
- data/lib/models/category.rb +11 -0
- data/lib/models/category_statistic.rb +11 -0
- data/lib/models/study.rb +11 -0
- data/lib/models/study_date.rb +11 -0
- data/lib/models/summary_stat.rb +11 -0
- data/lib/models/variable.rb +11 -0
- data/lib/models/version.rb +5 -0
- metadata +91 -0
data/lib/ddi-parser.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'libxml'
|
3
|
+
require 'models/catalog'
|
4
|
+
require 'models/category'
|
5
|
+
require 'models/category_statistic'
|
6
|
+
require 'models/study'
|
7
|
+
require 'models/study_date'
|
8
|
+
require 'models/variable'
|
9
|
+
require 'models/summary_stat'
|
10
|
+
|
11
|
+
module DDI
|
12
|
+
class Parser
|
13
|
+
|
14
|
+
#Given a DDI metadata file, parse it and return study information
|
15
|
+
#
|
16
|
+
#Returns a Nesstar::Study object
|
17
|
+
def parse ddi_file
|
18
|
+
catalog = DDI::Catalog.new
|
19
|
+
study = DDI::Study.new
|
20
|
+
study_info_hash = Hash.new
|
21
|
+
parser = LibXML::XML::Parser.file(ddi_file)
|
22
|
+
doc = parser.parse
|
23
|
+
studynodes = doc.find('//stdyDscr')
|
24
|
+
abstracts = studynodes[0].find('//abstract')
|
25
|
+
abstract = ""
|
26
|
+
abstracts.each do |ab|
|
27
|
+
abstract << ab.content.strip
|
28
|
+
end
|
29
|
+
abstract.strip!
|
30
|
+
study.abstract = abstract
|
31
|
+
study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
|
32
|
+
study.id = studynodes[0].find('//IDNo')[0].first.content.strip
|
33
|
+
|
34
|
+
#start and finish dates for study
|
35
|
+
dates = []
|
36
|
+
date = studynodes[0].find('//sumDscr/collDate')
|
37
|
+
date.each do |d|
|
38
|
+
a = d.attributes
|
39
|
+
study_date = DDI::StudyDate.new
|
40
|
+
study_date.type = a.get_attribute('event').value.strip
|
41
|
+
study_date.date = a.get_attribute('date').value.strip
|
42
|
+
dates.push(study_date)
|
43
|
+
end
|
44
|
+
study.dates = dates
|
45
|
+
study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0] == nil
|
46
|
+
# study.weight = studynodes[0].find('//sampProc')[0].first.content
|
47
|
+
study.variables = get_variable_information doc
|
48
|
+
return study
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
#information about the variables
|
54
|
+
def get_variable_information doc
|
55
|
+
variables = []
|
56
|
+
variable_info_hash = Hash.new
|
57
|
+
docnodes = doc.find('//dataDscr')
|
58
|
+
vargroups = docnodes[0].find('//dataDscr/varGrp')
|
59
|
+
vargroups.each do |vargroup|
|
60
|
+
#hash which holds all the variable groups
|
61
|
+
a = vargroup.attributes
|
62
|
+
groups = a.get_attribute('var')
|
63
|
+
if groups != nil
|
64
|
+
groups = a.get_attribute('var')
|
65
|
+
variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
|
66
|
+
# else
|
67
|
+
# variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
|
68
|
+
end
|
69
|
+
end
|
70
|
+
vars = docnodes[0].find('//dataDscr/var')
|
71
|
+
vars.each do |var|
|
72
|
+
variable = DDI::Variable.new
|
73
|
+
var_attr = var.attributes
|
74
|
+
variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
|
75
|
+
variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
|
76
|
+
variable.file = var_attr.get_attribute('files').value.strip unless var_attr.get_attribute('files') == nil
|
77
|
+
variable.interval = var_attr.get_attribute('intrvl').value.strip unless var_attr.get_attribute('intrvl') == nil
|
78
|
+
variable.label = var.find('./labl')[0].content.strip unless var.find('./labl')[0] == nil
|
79
|
+
rng = var.find('./valrng')
|
80
|
+
if rng != nil
|
81
|
+
if rng[0] != nil
|
82
|
+
range_attr = rng[0].first.attributes
|
83
|
+
max_val = range_attr.get_attribute('max')
|
84
|
+
variable.max = max_val.value.strip unless max_val == nil
|
85
|
+
min_val = range_attr.get_attribute('min')
|
86
|
+
variable.min = min_val.value.strip unless min_val == nil
|
87
|
+
end
|
88
|
+
end
|
89
|
+
q = var.find('./qstn')
|
90
|
+
if q[0] != nil
|
91
|
+
ql = q[0].find('./qstnLit')
|
92
|
+
if ql != nil
|
93
|
+
if ql[0] != nil
|
94
|
+
variable.question = ql[0].first.content.strip
|
95
|
+
end
|
96
|
+
end
|
97
|
+
iv = q[0].find('./ivuInstr')
|
98
|
+
if iv != nil
|
99
|
+
if iv[0] != nil
|
100
|
+
variable.interview_instruction = iv[0].first.content.strip
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
stats = var.find('./sumStat')
|
105
|
+
summary_stats = []
|
106
|
+
stats.each do |stat|
|
107
|
+
a = stat.attributes
|
108
|
+
# summary_stats[a.get_attribute('type').value] = stat.first.content
|
109
|
+
statistic = DDI::SummaryStat.new
|
110
|
+
statistic.type = a.get_attribute('type').value.strip
|
111
|
+
statistic.value = stat.first.content.strip
|
112
|
+
summary_stats.push(statistic)
|
113
|
+
end
|
114
|
+
variable.summary_stats = summary_stats
|
115
|
+
catgry = var.find('./catgry')
|
116
|
+
categories = []
|
117
|
+
#categories in ddi are value domains in mb
|
118
|
+
catgry.each do |cat|
|
119
|
+
category = DDI::Category.new
|
120
|
+
valxml = cat.find('./catValu')
|
121
|
+
if valxml != nil && valxml[0] != nil
|
122
|
+
category.value = valxml[0].first.content.strip unless valxml[0].first == nil
|
123
|
+
else
|
124
|
+
category.value = 'N/A'
|
125
|
+
end
|
126
|
+
labxml = cat.find('./labl')
|
127
|
+
if labxml != nil && labxml[0] != nil
|
128
|
+
category.label = labxml[0].first.content.strip unless labxml[0].first == nil
|
129
|
+
else
|
130
|
+
category.label = 'N/A'
|
131
|
+
end
|
132
|
+
catstats = cat.find('./catStat')
|
133
|
+
category_statistics = []
|
134
|
+
catstats.each do |catstat|
|
135
|
+
category_statistic = DDI::CategoryStatistic.new
|
136
|
+
a = catstat.attributes
|
137
|
+
if a != nil && a.get_attribute('type') != nil
|
138
|
+
category_statistic.type = a.get_attribute('type').value.strip
|
139
|
+
category_statistic.value = catstat.first.content.strip unless catstat.first == nil
|
140
|
+
category_statistics.push(category_statistic)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
category.category_statistics = category_statistics
|
144
|
+
categories.push(category)
|
145
|
+
end
|
146
|
+
#what group is the variable in
|
147
|
+
variable_info_hash.each_key do |key|
|
148
|
+
if variable_info_hash[key].include?(variable.id)
|
149
|
+
variable.group = key.strip
|
150
|
+
break
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
variable.categories = categories
|
155
|
+
variables.push(variable)
|
156
|
+
end
|
157
|
+
return variables
|
158
|
+
end
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module DDI
|
2
|
+
#A Nesstar catalog object, can contain 1 or more studies (ie datasets)
|
3
|
+
class Catalog
|
4
|
+
|
5
|
+
attr_reader :studies, :label, :description, :nesstar_id, :nesstar_uri
|
6
|
+
attr_writer :studies, :label, :description, :nesstar_id, :nesstar_uri
|
7
|
+
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
data/lib/models/study.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
module DDI
|
2
|
+
|
3
|
+
#Contains a set of variables and belongs to a catalog
|
4
|
+
class Study
|
5
|
+
|
6
|
+
attr_reader :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight, :nesstar_id, :nesstar_uri
|
7
|
+
attr_writer :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight, :nesstar_id, :nesstar_uri
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module DDI
|
2
|
+
|
3
|
+
#Information about a variable/column in a dataset
|
4
|
+
class Variable
|
5
|
+
|
6
|
+
attr_reader :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
|
7
|
+
attr_writer :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ddi-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Ian Dunlop
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-05-06 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: libxml-ruby
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - "="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 27
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 1
|
33
|
+
- 4
|
34
|
+
version: 1.1.4
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
description: This gem parses ddi metadata files
|
38
|
+
email:
|
39
|
+
- ian.dunlop@manchester.ac.uk
|
40
|
+
executables: []
|
41
|
+
|
42
|
+
extensions: []
|
43
|
+
|
44
|
+
extra_rdoc_files: []
|
45
|
+
|
46
|
+
files:
|
47
|
+
- lib/ddi-parser.rb
|
48
|
+
- lib/models/catalog.rb
|
49
|
+
- lib/models/category.rb
|
50
|
+
- lib/models/category_statistic.rb
|
51
|
+
- lib/models/study.rb
|
52
|
+
- lib/models/study_date.rb
|
53
|
+
- lib/models/summary_stat.rb
|
54
|
+
- lib/models/variable.rb
|
55
|
+
- lib/models/version.rb
|
56
|
+
has_rdoc: true
|
57
|
+
homepage: http://github.com/mygrid/ddi-parser
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
83
|
+
requirements: []
|
84
|
+
|
85
|
+
rubyforge_project: ddi-parser
|
86
|
+
rubygems_version: 1.3.7
|
87
|
+
signing_key:
|
88
|
+
specification_version: 3
|
89
|
+
summary: API for parsing ddi metadata files and returning results
|
90
|
+
test_files: []
|
91
|
+
|