ddi-parser 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/ddi-parser.rb +34 -14
  2. data/lib/models/version.rb +1 -1
  3. metadata +53 -27
@@ -8,19 +8,30 @@ require 'models/study'
8
8
  require 'models/study_date'
9
9
  require 'models/ddi_variable'
10
10
  require 'models/summary_stat'
11
-
11
+ require 'logger'
12
12
  module DDI
13
13
  class Parser
14
14
 
15
+ attr_accessor :logger
16
+
17
+ def initialize
18
+ @logger = Logger.new('ddi-parser.log')
19
+ end
20
+
15
21
  #Given a DDI metadata file, parse it and return study information
16
22
  #
17
23
  #Returns a Nesstar::Study object
18
24
  def parse ddi_file
25
+ @logger.info 'Parsing DDI file ' + ddi_file
19
26
  catalog = DDI::Catalog.new
20
27
  study = DDI::Study.new
21
28
  study_info_hash = Hash.new
22
29
  #TODO This will not work on windows since it depends on the unix tool file need to use a different way. Possibly use rchardet instead
23
- encode_type = `file --mime -br #{ddi_file}`.gsub(/\n/,"").split(';')[1].split('=')[1]
30
+ begin
31
+ encode_type = `file --mime -br #{ddi_file}`.gsub(/\n/,"").split(';')[1].split('=')[1]
32
+ rescue Exception => e
33
+
34
+ end
24
35
  #have to convert to UTF-8 for libxml
25
36
  contents = File.open(ddi_file).read
26
37
  output = Iconv.conv("UTF-8", encode_type, contents)
@@ -33,13 +44,11 @@ module DDI
33
44
  studynodes = doc.find('//stdyDscr')
34
45
  abstracts = studynodes[0].find('//abstract')
35
46
  abstract = ""
36
- abstracts.each do |ab|
37
- abstract << ab.content.strip
38
- end
47
+ abstracts.each {|ab| abstract << ab.content.strip}
39
48
  abstract.strip!
40
49
  study.abstract = abstract
41
- study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
42
- study.id = studynodes[0].find('//IDNo')[0].first.content.strip
50
+ study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip unless studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0] == nil
51
+ study.id = studynodes[0].find('//IDNo')[0].first.content.strip unless studynodes[0].find('//IDNo')[0] == nil
43
52
 
44
53
  #start and finish dates for study
45
54
  dates = []
@@ -54,7 +63,10 @@ module DDI
54
63
  end
55
64
  end
56
65
  study.dates = dates
57
- study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0].children.size == 0
66
+ studynodes[0].find('//sampProc')[0] ? samp_node = studynodes[0].find('//sampProc')[0] : samp_node = nil
67
+ unless samp_node == nil
68
+ study.sampling_procedure = samp_node.first.content.strip unless samp_node.first == nil
69
+ end
58
70
  # study.weight = studynodes[0].find('//sampProc')[0].first.content
59
71
  study.ddi_variables = get_variable_information doc
60
72
  return study
@@ -74,7 +86,7 @@ module DDI
74
86
  groups = a.get_attribute('var')
75
87
  if groups != nil
76
88
  groups = a.get_attribute('var')
77
- variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
89
+ variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ') unless vargroup.find('./labl')[0] == nil
78
90
  # else
79
91
  # variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
80
92
  end
@@ -85,17 +97,25 @@ module DDI
85
97
  var_attr = var.attributes
86
98
  variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
87
99
  variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
100
+ #if there is no name then it has no meaning or context
101
+ next if variable.name == nil
88
102
  variable.file = var_attr.get_attribute('files').value.strip unless var_attr.get_attribute('files') == nil
89
103
  variable.interval = var_attr.get_attribute('intrvl').value.strip unless var_attr.get_attribute('intrvl') == nil
90
104
  variable.label = var.find('./labl')[0].content.strip unless var.find('./labl')[0] == nil
105
+ #these things never seem consistent with the schema, might be an inner element, might be an attribute!
106
+ if var.find('./labl')[0] == nil
107
+ variable.label = var_attr.get_attribute('labl').value.strip unless var_attr.get_attribute('labl') == nil
108
+ end
91
109
  rng = var.find('./valrng')
92
110
  if rng != nil
93
111
  if rng[0] != nil
94
- range_attr = rng[0].first.attributes
95
- max_val = range_attr.get_attribute('max')
96
- variable.max = max_val.value.strip unless max_val == nil
97
- min_val = range_attr.get_attribute('min')
98
- variable.min = min_val.value.strip unless min_val == nil
112
+ unless rng[0].first == nil
113
+ range_attr = rng[0].first.attributes
114
+ max_val = range_attr.get_attribute('max')
115
+ variable.max = max_val.value.strip unless max_val == nil
116
+ min_val = range_attr.get_attribute('min')
117
+ variable.min = min_val.value.strip unless min_val == nil
118
+ end
99
119
  end
100
120
  end
101
121
  q = var.find('./qstn')
@@ -1,5 +1,5 @@
1
1
  module DDI
2
2
  module Parser
3
- VERSION = "0.0.5"
3
+ VERSION = "0.1.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,34 +1,49 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: ddi-parser
3
- version: !ruby/object:Gem::Version
4
- version: 0.0.5
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Ian Dunlop
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2012-04-16 00:00:00.000000000 Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
17
+
18
+ date: 2012-06-14 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
15
22
  name: libxml-ruby
16
- requirement: &12850280 !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
17
25
  none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 2
32
+ - 2
33
+ - 0
21
34
  version: 2.2.0
22
35
  type: :runtime
23
- prerelease: false
24
- version_requirements: *12850280
36
+ version_requirements: *id001
25
37
  description: This gem parses ddi metadata files
26
- email:
38
+ email:
27
39
  - ian.dunlop@manchester.ac.uk
28
40
  executables: []
41
+
29
42
  extensions: []
43
+
30
44
  extra_rdoc_files: []
31
- files:
45
+
46
+ files:
32
47
  - lib/ddi-parser.rb
33
48
  - lib/models/version.rb
34
49
  - lib/models/summary_stat.rb
@@ -38,28 +53,39 @@ files:
38
53
  - lib/models/category_statistic.rb
39
54
  - lib/models/ddi_variable.rb
40
55
  - lib/models/study_date.rb
56
+ has_rdoc: true
41
57
  homepage: http://github.com/mygrid/ddi-parser
42
58
  licenses: []
59
+
43
60
  post_install_message:
44
61
  rdoc_options: []
45
- require_paths:
62
+
63
+ require_paths:
46
64
  - lib
47
- required_ruby_version: !ruby/object:Gem::Requirement
65
+ required_ruby_version: !ruby/object:Gem::Requirement
48
66
  none: false
49
- requirements:
50
- - - ! '>='
51
- - !ruby/object:Gem::Version
52
- version: '0'
53
- required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ hash: 3
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
75
  none: false
55
- requirements:
56
- - - ! '>='
57
- - !ruby/object:Gem::Version
58
- version: '0'
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ hash: 3
80
+ segments:
81
+ - 0
82
+ version: "0"
59
83
  requirements: []
84
+
60
85
  rubyforge_project: ddi-parser
61
- rubygems_version: 1.8.11
86
+ rubygems_version: 1.6.2
62
87
  signing_key:
63
88
  specification_version: 3
64
89
  summary: API for parsing ddi metadata files and returning results
65
90
  test_files: []
91
+