ddi-parser 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/ddi-parser.rb +34 -14
  2. data/lib/models/version.rb +1 -1
  3. metadata +53 -27
@@ -8,19 +8,30 @@ require 'models/study'
8
8
  require 'models/study_date'
9
9
  require 'models/ddi_variable'
10
10
  require 'models/summary_stat'
11
-
11
+ require 'logger'
12
12
  module DDI
13
13
  class Parser
14
14
 
15
+ attr_accessor :logger
16
+
17
+ def initialize
18
+ @logger = Logger.new('ddi-parser.log')
19
+ end
20
+
15
21
  #Given a DDI metadata file, parse it and return study information
16
22
  #
17
23
  #Returns a Nesstar::Study object
18
24
  def parse ddi_file
25
+ @logger.info 'Parsing DDI file ' + ddi_file
19
26
  catalog = DDI::Catalog.new
20
27
  study = DDI::Study.new
21
28
  study_info_hash = Hash.new
22
29
  #TODO This will not work on windows since it depends on the unix tool file need to use a different way. Possibly use rchardet instead
23
- encode_type = `file --mime -br #{ddi_file}`.gsub(/\n/,"").split(';')[1].split('=')[1]
30
+ begin
31
+ encode_type = `file --mime -br #{ddi_file}`.gsub(/\n/,"").split(';')[1].split('=')[1]
32
+ rescue Exception => e
33
+
34
+ end
24
35
  #have to convert to UTF-8 for libxml
25
36
  contents = File.open(ddi_file).read
26
37
  output = Iconv.conv("UTF-8", encode_type, contents)
@@ -33,13 +44,11 @@ module DDI
33
44
  studynodes = doc.find('//stdyDscr')
34
45
  abstracts = studynodes[0].find('//abstract')
35
46
  abstract = ""
36
- abstracts.each do |ab|
37
- abstract << ab.content.strip
38
- end
47
+ abstracts.each {|ab| abstract << ab.content.strip}
39
48
  abstract.strip!
40
49
  study.abstract = abstract
41
- study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
42
- study.id = studynodes[0].find('//IDNo')[0].first.content.strip
50
+ study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip unless studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0] == nil
51
+ study.id = studynodes[0].find('//IDNo')[0].first.content.strip unless studynodes[0].find('//IDNo')[0] == nil
43
52
 
44
53
  #start and finish dates for study
45
54
  dates = []
@@ -54,7 +63,10 @@ module DDI
54
63
  end
55
64
  end
56
65
  study.dates = dates
57
- study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0].children.size == 0
66
+ studynodes[0].find('//sampProc')[0] ? samp_node = studynodes[0].find('//sampProc')[0] : samp_node = nil
67
+ unless samp_node == nil
68
+ study.sampling_procedure = samp_node.first.content.strip unless samp_node.first == nil
69
+ end
58
70
  # study.weight = studynodes[0].find('//sampProc')[0].first.content
59
71
  study.ddi_variables = get_variable_information doc
60
72
  return study
@@ -74,7 +86,7 @@ module DDI
74
86
  groups = a.get_attribute('var')
75
87
  if groups != nil
76
88
  groups = a.get_attribute('var')
77
- variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
89
+ variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ') unless vargroup.find('./labl')[0] == nil
78
90
  # else
79
91
  # variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
80
92
  end
@@ -85,17 +97,25 @@ module DDI
85
97
  var_attr = var.attributes
86
98
  variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
87
99
  variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
100
+ #if there is no name then it has no meaning or context
101
+ next if variable.name == nil
88
102
  variable.file = var_attr.get_attribute('files').value.strip unless var_attr.get_attribute('files') == nil
89
103
  variable.interval = var_attr.get_attribute('intrvl').value.strip unless var_attr.get_attribute('intrvl') == nil
90
104
  variable.label = var.find('./labl')[0].content.strip unless var.find('./labl')[0] == nil
105
+ #these things never seem consistent with the schema, might be an inner element, might be an attribute!
106
+ if var.find('./labl')[0] == nil
107
+ variable.label = var_attr.get_attribute('labl').value.strip unless var_attr.get_attribute('labl') == nil
108
+ end
91
109
  rng = var.find('./valrng')
92
110
  if rng != nil
93
111
  if rng[0] != nil
94
- range_attr = rng[0].first.attributes
95
- max_val = range_attr.get_attribute('max')
96
- variable.max = max_val.value.strip unless max_val == nil
97
- min_val = range_attr.get_attribute('min')
98
- variable.min = min_val.value.strip unless min_val == nil
112
+ unless rng[0].first == nil
113
+ range_attr = rng[0].first.attributes
114
+ max_val = range_attr.get_attribute('max')
115
+ variable.max = max_val.value.strip unless max_val == nil
116
+ min_val = range_attr.get_attribute('min')
117
+ variable.min = min_val.value.strip unless min_val == nil
118
+ end
99
119
  end
100
120
  end
101
121
  q = var.find('./qstn')
@@ -1,5 +1,5 @@
1
1
  module DDI
2
2
  module Parser
3
- VERSION = "0.0.5"
3
+ VERSION = "0.1.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,34 +1,49 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: ddi-parser
3
- version: !ruby/object:Gem::Version
4
- version: 0.0.5
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Ian Dunlop
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2012-04-16 00:00:00.000000000 Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
17
+
18
+ date: 2012-06-14 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
15
22
  name: libxml-ruby
16
- requirement: &12850280 !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
17
25
  none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 2
32
+ - 2
33
+ - 0
21
34
  version: 2.2.0
22
35
  type: :runtime
23
- prerelease: false
24
- version_requirements: *12850280
36
+ version_requirements: *id001
25
37
  description: This gem parses ddi metadata files
26
- email:
38
+ email:
27
39
  - ian.dunlop@manchester.ac.uk
28
40
  executables: []
41
+
29
42
  extensions: []
43
+
30
44
  extra_rdoc_files: []
31
- files:
45
+
46
+ files:
32
47
  - lib/ddi-parser.rb
33
48
  - lib/models/version.rb
34
49
  - lib/models/summary_stat.rb
@@ -38,28 +53,39 @@ files:
38
53
  - lib/models/category_statistic.rb
39
54
  - lib/models/ddi_variable.rb
40
55
  - lib/models/study_date.rb
56
+ has_rdoc: true
41
57
  homepage: http://github.com/mygrid/ddi-parser
42
58
  licenses: []
59
+
43
60
  post_install_message:
44
61
  rdoc_options: []
45
- require_paths:
62
+
63
+ require_paths:
46
64
  - lib
47
- required_ruby_version: !ruby/object:Gem::Requirement
65
+ required_ruby_version: !ruby/object:Gem::Requirement
48
66
  none: false
49
- requirements:
50
- - - ! '>='
51
- - !ruby/object:Gem::Version
52
- version: '0'
53
- required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
75
  none: false
55
- requirements:
56
- - - ! '>='
57
- - !ruby/object:Gem::Version
58
- version: '0'
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 3
80
+ segments:
81
+ - 0
82
+ version: "0"
59
83
  requirements: []
84
+
60
85
  rubyforge_project: ddi-parser
61
- rubygems_version: 1.8.11
86
+ rubygems_version: 1.6.2
62
87
  signing_key:
63
88
  specification_version: 3
64
89
  summary: API for parsing ddi metadata files and returning results
65
90
  test_files: []
91
+