ddi-parser 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/ddi-parser.rb +34 -14
- data/lib/models/version.rb +1 -1
- metadata +53 -27
data/lib/ddi-parser.rb
CHANGED
@@ -8,19 +8,30 @@ require 'models/study'
|
|
8
8
|
require 'models/study_date'
|
9
9
|
require 'models/ddi_variable'
|
10
10
|
require 'models/summary_stat'
|
11
|
-
|
11
|
+
require 'logger'
|
12
12
|
module DDI
|
13
13
|
class Parser
|
14
14
|
|
15
|
+
attr_accessor :logger
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@logger = Logger.new('ddi-parser.log')
|
19
|
+
end
|
20
|
+
|
15
21
|
#Given a DDI metadata file, parse it and return study information
|
16
22
|
#
|
17
23
|
#Returns a Nesstar::Study object
|
18
24
|
def parse ddi_file
|
25
|
+
@logger.info 'Parsing DDI file ' + ddi_file
|
19
26
|
catalog = DDI::Catalog.new
|
20
27
|
study = DDI::Study.new
|
21
28
|
study_info_hash = Hash.new
|
22
29
|
#TODO This will not work on windows since it depends on the unix tool file need to use a different way. Possibly use rchardet instead
|
23
|
-
|
30
|
+
begin
|
31
|
+
encode_type = `file --mime -br #{ddi_file}`.gsub(/\n/,"").split(';')[1].split('=')[1]
|
32
|
+
rescue Exception => e
|
33
|
+
|
34
|
+
end
|
24
35
|
#have to convert to UTF-8 for libxml
|
25
36
|
contents = File.open(ddi_file).read
|
26
37
|
output = Iconv.conv("UTF-8", encode_type, contents)
|
@@ -33,13 +44,11 @@ module DDI
|
|
33
44
|
studynodes = doc.find('//stdyDscr')
|
34
45
|
abstracts = studynodes[0].find('//abstract')
|
35
46
|
abstract = ""
|
36
|
-
abstracts.each
|
37
|
-
abstract << ab.content.strip
|
38
|
-
end
|
47
|
+
abstracts.each {|ab| abstract << ab.content.strip}
|
39
48
|
abstract.strip!
|
40
49
|
study.abstract = abstract
|
41
|
-
study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
|
42
|
-
study.id = studynodes[0].find('//IDNo')[0].first.content.strip
|
50
|
+
study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip unless studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0] == nil
|
51
|
+
study.id = studynodes[0].find('//IDNo')[0].first.content.strip unless studynodes[0].find('//IDNo')[0] == nil
|
43
52
|
|
44
53
|
#start and finish dates for study
|
45
54
|
dates = []
|
@@ -54,7 +63,10 @@ module DDI
|
|
54
63
|
end
|
55
64
|
end
|
56
65
|
study.dates = dates
|
57
|
-
|
66
|
+
studynodes[0].find('//sampProc')[0] ? samp_node = studynodes[0].find('//sampProc')[0] : samp_node = nil
|
67
|
+
unless samp_node == nil
|
68
|
+
study.sampling_procedure = samp_node.first.content.strip unless samp_node.first == nil
|
69
|
+
end
|
58
70
|
# study.weight = studynodes[0].find('//sampProc')[0].first.content
|
59
71
|
study.ddi_variables = get_variable_information doc
|
60
72
|
return study
|
@@ -74,7 +86,7 @@ module DDI
|
|
74
86
|
groups = a.get_attribute('var')
|
75
87
|
if groups != nil
|
76
88
|
groups = a.get_attribute('var')
|
77
|
-
variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
|
89
|
+
variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ') unless vargroup.find('./labl')[0] == nil
|
78
90
|
# else
|
79
91
|
# variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
|
80
92
|
end
|
@@ -85,17 +97,25 @@ module DDI
|
|
85
97
|
var_attr = var.attributes
|
86
98
|
variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
|
87
99
|
variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
|
100
|
+
#if there is no name then it has no meaning or context
|
101
|
+
next if variable.name == nil
|
88
102
|
variable.file = var_attr.get_attribute('files').value.strip unless var_attr.get_attribute('files') == nil
|
89
103
|
variable.interval = var_attr.get_attribute('intrvl').value.strip unless var_attr.get_attribute('intrvl') == nil
|
90
104
|
variable.label = var.find('./labl')[0].content.strip unless var.find('./labl')[0] == nil
|
105
|
+
#these things never seem consistent with the schema, might be an inner element, might be an attribute!
|
106
|
+
if var.find('./labl')[0] == nil
|
107
|
+
variable.label = var_attr.get_attribute('labl').value.strip unless var_attr.get_attribute('labl') == nil
|
108
|
+
end
|
91
109
|
rng = var.find('./valrng')
|
92
110
|
if rng != nil
|
93
111
|
if rng[0] != nil
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
112
|
+
unless rng[0].first == nil
|
113
|
+
range_attr = rng[0].first.attributes
|
114
|
+
max_val = range_attr.get_attribute('max')
|
115
|
+
variable.max = max_val.value.strip unless max_val == nil
|
116
|
+
min_val = range_attr.get_attribute('min')
|
117
|
+
variable.min = min_val.value.strip unless min_val == nil
|
118
|
+
end
|
99
119
|
end
|
100
120
|
end
|
101
121
|
q = var.find('./qstn')
|
data/lib/models/version.rb
CHANGED
metadata
CHANGED
@@ -1,34 +1,49 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddi-parser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Ian Dunlop
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
|
18
|
+
date: 2012-06-14 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
15
22
|
name: libxml-ruby
|
16
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
25
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 2
|
33
|
+
- 0
|
21
34
|
version: 2.2.0
|
22
35
|
type: :runtime
|
23
|
-
|
24
|
-
version_requirements: *12850280
|
36
|
+
version_requirements: *id001
|
25
37
|
description: This gem parses ddi metadata files
|
26
|
-
email:
|
38
|
+
email:
|
27
39
|
- ian.dunlop@manchester.ac.uk
|
28
40
|
executables: []
|
41
|
+
|
29
42
|
extensions: []
|
43
|
+
|
30
44
|
extra_rdoc_files: []
|
31
|
-
|
45
|
+
|
46
|
+
files:
|
32
47
|
- lib/ddi-parser.rb
|
33
48
|
- lib/models/version.rb
|
34
49
|
- lib/models/summary_stat.rb
|
@@ -38,28 +53,39 @@ files:
|
|
38
53
|
- lib/models/category_statistic.rb
|
39
54
|
- lib/models/ddi_variable.rb
|
40
55
|
- lib/models/study_date.rb
|
56
|
+
has_rdoc: true
|
41
57
|
homepage: http://github.com/mygrid/ddi-parser
|
42
58
|
licenses: []
|
59
|
+
|
43
60
|
post_install_message:
|
44
61
|
rdoc_options: []
|
45
|
-
|
62
|
+
|
63
|
+
require_paths:
|
46
64
|
- lib
|
47
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
66
|
none: false
|
49
|
-
requirements:
|
50
|
-
- -
|
51
|
-
- !ruby/object:Gem::Version
|
52
|
-
|
53
|
-
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
75
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
59
83
|
requirements: []
|
84
|
+
|
60
85
|
rubyforge_project: ddi-parser
|
61
|
-
rubygems_version: 1.
|
86
|
+
rubygems_version: 1.6.2
|
62
87
|
signing_key:
|
63
88
|
specification_version: 3
|
64
89
|
summary: API for parsing ddi metadata files and returning results
|
65
90
|
test_files: []
|
91
|
+
|