ddi-parser 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ddi-parser.rb CHANGED
@@ -5,7 +5,7 @@ require 'models/category'
5
5
  require 'models/category_statistic'
6
6
  require 'models/study'
7
7
  require 'models/study_date'
8
- require 'models/variable'
8
+ require 'models/ddi_variable'
9
9
  require 'models/summary_stat'
10
10
 
11
11
  module DDI
@@ -18,7 +18,15 @@ module DDI
18
18
  catalog = DDI::Catalog.new
19
19
  study = DDI::Study.new
20
20
  study_info_hash = Hash.new
21
- parser = LibXML::XML::Parser.file(ddi_file)
21
+ encode_type = `file --mime -br #{ddi_file}`.gsub(/\n/,"").split(';')[1].split('=')[1]
22
+ #have to convert to UTF-8 for libxml
23
+ contents = File.open(ddi_file).read
24
+ output = Iconv.conv("UTF-8", encode_type, contents)
25
+ converted_file = File.join(File.dirname(ddi_file), "converted_file.xml")
26
+ file = File.open(converted_file, 'w')
27
+ file.write(output)
28
+ file.close
29
+ parser = LibXML::XML::Parser.file(converted_file)
22
30
  doc = parser.parse
23
31
  studynodes = doc.find('//stdyDscr')
24
32
  abstracts = studynodes[0].find('//abstract')
@@ -36,15 +44,17 @@ module DDI
36
44
  date = studynodes[0].find('//sumDscr/collDate')
37
45
  date.each do |d|
38
46
  a = d.attributes
39
- study_date = DDI::StudyDate.new
40
- study_date.type = a.get_attribute('event').value.strip
41
- study_date.date = a.get_attribute('date').value.strip
42
- dates.push(study_date)
47
+ unless a.length == 0
48
+ study_date = DDI::StudyDate.new
49
+ study_date.type = a.get_attribute('event').value.strip
50
+ study_date.date = a.get_attribute('date').value.strip
51
+ dates.push(study_date)
52
+ end
43
53
  end
44
54
  study.dates = dates
45
- study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0] == nil
55
+ study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0].children.size == 0
46
56
  # study.weight = studynodes[0].find('//sampProc')[0].first.content
47
- study.variables = get_variable_information doc
57
+ study.ddi_variables = get_variable_information doc
48
58
  return study
49
59
  end
50
60
 
@@ -69,7 +79,7 @@ module DDI
69
79
  end
70
80
  vars = docnodes[0].find('//dataDscr/var')
71
81
  vars.each do |var|
72
- variable = DDI::Variable.new
82
+ variable = DDI::DDIVariable.new
73
83
  var_attr = var.attributes
74
84
  variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
75
85
  variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
@@ -158,4 +168,4 @@ module DDI
158
168
  end
159
169
 
160
170
  end
161
- end
171
+ end
@@ -1,11 +1,11 @@
1
1
  module DDI
2
2
 
3
3
  #Information about a variable/column in a dataset
4
- class Variable
4
+ class DDIVariable
5
5
 
6
6
  attr_reader :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
7
7
  attr_writer :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
8
8
 
9
9
  end
10
10
 
11
- end
11
+ end
data/lib/models/study.rb CHANGED
@@ -3,9 +3,9 @@ module DDI
3
3
  #Contains a set of variables and belongs to a catalog
4
4
  class Study
5
5
 
6
- attr_reader :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight, :nesstar_id, :nesstar_uri
7
- attr_writer :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight, :nesstar_id, :nesstar_uri
6
+ attr_reader :ddi_variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight, :nesstar_id, :nesstar_uri
7
+ attr_writer :ddi_variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight, :nesstar_id, :nesstar_uri
8
8
 
9
9
  end
10
10
 
11
- end
11
+ end
@@ -1,5 +1,5 @@
1
1
  module DDI
2
2
  module Parser
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ddi-parser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease: false
4
+ hash: 27
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Ian Dunlop
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-06 00:00:00 +01:00
18
+ date: 2011-09-26 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -45,14 +45,14 @@ extra_rdoc_files: []
45
45
 
46
46
  files:
47
47
  - lib/ddi-parser.rb
48
+ - lib/models/version.rb
49
+ - lib/models/summary_stat.rb
50
+ - lib/models/study.rb
48
51
  - lib/models/catalog.rb
49
52
  - lib/models/category.rb
50
53
  - lib/models/category_statistic.rb
51
- - lib/models/study.rb
54
+ - lib/models/ddi_variable.rb
52
55
  - lib/models/study_date.rb
53
- - lib/models/summary_stat.rb
54
- - lib/models/variable.rb
55
- - lib/models/version.rb
56
56
  has_rdoc: true
57
57
  homepage: http://github.com/mygrid/ddi-parser
58
58
  licenses: []
@@ -83,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  requirements: []
84
84
 
85
85
  rubyforge_project: ddi-parser
86
- rubygems_version: 1.3.7
86
+ rubygems_version: 1.6.2
87
87
  signing_key:
88
88
  specification_version: 3
89
89
  summary: API for parsing ddi metadata files and returning results