ddi-parser 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/ddi-parser.rb +34 -14
- data/lib/models/version.rb +1 -1
- metadata +53 -27
data/lib/ddi-parser.rb
CHANGED
@@ -8,19 +8,30 @@ require 'models/study'
|
|
8
8
|
require 'models/study_date'
|
9
9
|
require 'models/ddi_variable'
|
10
10
|
require 'models/summary_stat'
|
11
|
-
|
11
|
+
require 'logger'
|
12
12
|
module DDI
|
13
13
|
class Parser
|
14
14
|
|
15
|
+
attr_accessor :logger
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@logger = Logger.new('ddi-parser.log')
|
19
|
+
end
|
20
|
+
|
15
21
|
#Given a DDI metadata file, parse it and return study information
|
16
22
|
#
|
17
23
|
#Returns a Nesstar::Study object
|
18
24
|
def parse ddi_file
|
25
|
+
@logger.info 'Parsing DDI file ' + ddi_file
|
19
26
|
catalog = DDI::Catalog.new
|
20
27
|
study = DDI::Study.new
|
21
28
|
study_info_hash = Hash.new
|
22
29
|
#TODO This will not work on windows since it depends on the unix tool file need to use a different way. Possibly use rchardet instead
|
23
|
-
|
30
|
+
begin
|
31
|
+
encode_type = `file --mime -br #{ddi_file}`.gsub(/\n/,"").split(';')[1].split('=')[1]
|
32
|
+
rescue Exception => e
|
33
|
+
|
34
|
+
end
|
24
35
|
#have to convert to UTF-8 for libxml
|
25
36
|
contents = File.open(ddi_file).read
|
26
37
|
output = Iconv.conv("UTF-8", encode_type, contents)
|
@@ -33,13 +44,11 @@ module DDI
|
|
33
44
|
studynodes = doc.find('//stdyDscr')
|
34
45
|
abstracts = studynodes[0].find('//abstract')
|
35
46
|
abstract = ""
|
36
|
-
abstracts.each
|
37
|
-
abstract << ab.content.strip
|
38
|
-
end
|
47
|
+
abstracts.each {|ab| abstract << ab.content.strip}
|
39
48
|
abstract.strip!
|
40
49
|
study.abstract = abstract
|
41
|
-
study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
|
42
|
-
study.id = studynodes[0].find('//IDNo')[0].first.content.strip
|
50
|
+
study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip unless studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0] == nil
|
51
|
+
study.id = studynodes[0].find('//IDNo')[0].first.content.strip unless studynodes[0].find('//IDNo')[0] == nil
|
43
52
|
|
44
53
|
#start and finish dates for study
|
45
54
|
dates = []
|
@@ -54,7 +63,10 @@ module DDI
|
|
54
63
|
end
|
55
64
|
end
|
56
65
|
study.dates = dates
|
57
|
-
|
66
|
+
studynodes[0].find('//sampProc')[0] ? samp_node = studynodes[0].find('//sampProc')[0] : samp_node = nil
|
67
|
+
unless samp_node == nil
|
68
|
+
study.sampling_procedure = samp_node.first.content.strip unless samp_node.first == nil
|
69
|
+
end
|
58
70
|
# study.weight = studynodes[0].find('//sampProc')[0].first.content
|
59
71
|
study.ddi_variables = get_variable_information doc
|
60
72
|
return study
|
@@ -74,7 +86,7 @@ module DDI
|
|
74
86
|
groups = a.get_attribute('var')
|
75
87
|
if groups != nil
|
76
88
|
groups = a.get_attribute('var')
|
77
|
-
variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
|
89
|
+
variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ') unless vargroup.find('./labl')[0] == nil
|
78
90
|
# else
|
79
91
|
# variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
|
80
92
|
end
|
@@ -85,17 +97,25 @@ module DDI
|
|
85
97
|
var_attr = var.attributes
|
86
98
|
variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
|
87
99
|
variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
|
100
|
+
#if there is no name then it has no meaning or context
|
101
|
+
next if variable.name == nil
|
88
102
|
variable.file = var_attr.get_attribute('files').value.strip unless var_attr.get_attribute('files') == nil
|
89
103
|
variable.interval = var_attr.get_attribute('intrvl').value.strip unless var_attr.get_attribute('intrvl') == nil
|
90
104
|
variable.label = var.find('./labl')[0].content.strip unless var.find('./labl')[0] == nil
|
105
|
+
#these things never seem consistent with the schema, might be an inner element, might be an attribute!
|
106
|
+
if var.find('./labl')[0] == nil
|
107
|
+
variable.label = var_attr.get_attribute('labl').value.strip unless var_attr.get_attribute('labl') == nil
|
108
|
+
end
|
91
109
|
rng = var.find('./valrng')
|
92
110
|
if rng != nil
|
93
111
|
if rng[0] != nil
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
112
|
+
unless rng[0].first == nil
|
113
|
+
range_attr = rng[0].first.attributes
|
114
|
+
max_val = range_attr.get_attribute('max')
|
115
|
+
variable.max = max_val.value.strip unless max_val == nil
|
116
|
+
min_val = range_attr.get_attribute('min')
|
117
|
+
variable.min = min_val.value.strip unless min_val == nil
|
118
|
+
end
|
99
119
|
end
|
100
120
|
end
|
101
121
|
q = var.find('./qstn')
|
data/lib/models/version.rb
CHANGED
metadata
CHANGED
@@ -1,34 +1,49 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddi-parser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Ian Dunlop
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
17
|
+
|
18
|
+
date: 2012-06-14 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
15
22
|
name: libxml-ruby
|
16
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
25
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 2
|
33
|
+
- 0
|
21
34
|
version: 2.2.0
|
22
35
|
type: :runtime
|
23
|
-
|
24
|
-
version_requirements: *12850280
|
36
|
+
version_requirements: *id001
|
25
37
|
description: This gem parses ddi metadata files
|
26
|
-
email:
|
38
|
+
email:
|
27
39
|
- ian.dunlop@manchester.ac.uk
|
28
40
|
executables: []
|
41
|
+
|
29
42
|
extensions: []
|
43
|
+
|
30
44
|
extra_rdoc_files: []
|
31
|
-
|
45
|
+
|
46
|
+
files:
|
32
47
|
- lib/ddi-parser.rb
|
33
48
|
- lib/models/version.rb
|
34
49
|
- lib/models/summary_stat.rb
|
@@ -38,28 +53,39 @@ files:
|
|
38
53
|
- lib/models/category_statistic.rb
|
39
54
|
- lib/models/ddi_variable.rb
|
40
55
|
- lib/models/study_date.rb
|
56
|
+
has_rdoc: true
|
41
57
|
homepage: http://github.com/mygrid/ddi-parser
|
42
58
|
licenses: []
|
59
|
+
|
43
60
|
post_install_message:
|
44
61
|
rdoc_options: []
|
45
|
-
|
62
|
+
|
63
|
+
require_paths:
|
46
64
|
- lib
|
47
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
66
|
none: false
|
49
|
-
requirements:
|
50
|
-
- -
|
51
|
-
- !ruby/object:Gem::Version
|
52
|
-
|
53
|
-
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
75
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
hash: 3
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
59
83
|
requirements: []
|
84
|
+
|
60
85
|
rubyforge_project: ddi-parser
|
61
|
-
rubygems_version: 1.
|
86
|
+
rubygems_version: 1.6.2
|
62
87
|
signing_key:
|
63
88
|
specification_version: 3
|
64
89
|
summary: API for parsing ddi metadata files and returning results
|
65
90
|
test_files: []
|
91
|
+
|