nesstar-api 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ module Nesstar
2
+
3
+ class Catalog
4
+
5
+ attr_reader :studies, :label, :description
6
+ attr_writer :studies, :label, :description
7
+
8
+ end
9
+
10
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Value information for the rows in a variable
4
+ class Category
5
+
6
+ attr_reader :value, :label, :category_statistics
7
+ attr_writer :value, :label, :category_statistics
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Stats about a category belonging to a a variable
4
+ class CategoryStatistic
5
+
6
+ attr_reader :type, :value
7
+ attr_writer :type, :value
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Contains a set of variables and belongs to a catalog
4
+ class Study
5
+
6
+ attr_reader :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight
7
+ attr_writer :variables, :abstract, :title, :id, :dates, :sampling_procedure, :weight
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Dates that are important to the study eg. start date or end date
4
+ class StudyDate
5
+
6
+ attr_reader :type, :date
7
+ attr_writer :type, :date
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,10 @@
1
+ module Nesstar
2
+
3
+ class SummaryStat
4
+
5
+ attr_reader :type, :value
6
+ attr_writer :type, :value
7
+
8
+ end
9
+
10
+ end
@@ -0,0 +1,11 @@
1
+ module Nesstar
2
+
3
+ #Information about a variable/column in a dataset
4
+ class Variable
5
+
6
+ attr_reader :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
7
+ attr_writer :name, :label, :group, :id, :file, :interval, :max, :min, :question, :interview_instruction, :summary_stats, :categories
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,5 @@
1
+ module Nesstar
2
+ module Api
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,347 @@
1
+ require 'rubygems'
2
+ require 'zlib'
3
+ require 'net/http'
4
+ require 'stringio'
5
+ require 'cgi'
6
+ require 'libxml'
7
+ require 'nokogiri'
8
+ require 'tree'
9
+ require 'nesstar-api/catalog'
10
+ require 'nesstar-api/category'
11
+ require 'nesstar-api/category_statistic'
12
+ require 'nesstar-api/study'
13
+ require 'nesstar-api/study_date'
14
+ require 'nesstar-api/variable'
15
+ require 'nesstar-api/summary_stat'
16
+
17
+ module Nesstar
18
+ module Api
19
+
20
+ class CatalogApi
21
+
22
+ #given tree of datasets and catalogs figure out which are the
23
+ #survey types, which are surveys and link them to child datasets
24
+ def parse_surveys_from_nodes node, surveys_hash, survey_types_hash
25
+ node.children.each do |node|
26
+ if node.name.index('fStudy')
27
+ #this is a dataset, find its survey and survey_type
28
+ survey = node.parent
29
+ if surveys_hash.has_key?(survey.name)
30
+ surveys_hash[survey.name].push(node.name)
31
+ else
32
+ surveys_hash[survey.name] = []
33
+ surveys_hash[survey.name].push(node.name)
34
+ end
35
+ survey_type = survey.parent
36
+ if survey_type
37
+ if survey_types_hash.has_key?(survey_type.name)
38
+ if !survey_types_hash[survey_type.name].include?(survey.name)
39
+ survey_types_hash[survey_type.name].push(survey.name)
40
+ end
41
+ else
42
+ survey_types_hash[survey_type.name] = []
43
+ survey_types_hash[survey_type.name].push(survey.name)
44
+ end
45
+ else
46
+ if survey_types_hash.has_key?('none')
47
+ if !survey_types_hash['none'].include?(survey.name)
48
+ survey_types_hash['none'].push(survey.name)
49
+ end
50
+ else
51
+ survey_types_hash['none'] = []
52
+ survey_types_hash['none'].push(survey.name)
53
+ end
54
+ end
55
+ else
56
+ #its a catalog so keep going downwards
57
+ parse_surveys_from_nodes node, surveys_hash, survey_types_hash
58
+ end
59
+ end
60
+ end
61
+
62
+ def get_catalog url, catalog
63
+
64
+ #Hash of catalogs to their child datasets
65
+ catalog_hash = Hash.new
66
+
67
+ uri = URI.parse(url)
68
+ query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
69
+ full_uri = uri.merge query_string
70
+ res = Net::HTTP.get full_uri
71
+ doc = Nokogiri::HTML(res)
72
+
73
+ parse_out_datasets doc, uri.host, catalog, catalog_hash
74
+
75
+ return catalog_hash
76
+ end
77
+
78
+ def get_nodes url, catalog
79
+ #tree of catalogs to their child datasets
80
+ root_tree_node = Tree::TreeNode.new(catalog, "Catalog Content")
81
+
82
+ uri = URI.parse(url)
83
+ query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
84
+ full_uri = uri.merge query_string
85
+ res = Net::HTTP.get full_uri
86
+ doc = Nokogiri::HTML(res)
87
+
88
+ dataset_tree doc, uri.host, catalog, root_tree_node
89
+
90
+ return root_tree_node
91
+ end
92
+
93
+ #get the ddi xml for a nesstar dataset
94
+ def get_ddi uri, dataset
95
+ ddi_uri = URI.parse(uri)
96
+ ddi_uri.merge!("/obj/fStudy/" + dataset)
97
+ ddi_uri.merge!('?http://www.nesstar.org/rdf/method=http://www.nesstar.org/rdf/Dataset/GetDDI')
98
+ res = Net::HTTP.get(ddi_uri)
99
+ gz = Zlib::GzipReader.new(StringIO.new(res))
100
+ xml = gz.read
101
+ return xml
102
+ end
103
+
104
+ #return a catalog object with information inside it
105
+ #uri is something like http://nesstar.here.com
106
+ #catalog is the name of the catalog eg Catalog20
107
+ def get_catalog_information uri, catalog
108
+ catalog_uri = URI.parse(uri)
109
+ catalog_uri.merge!("/obj/fCatalog/" + catalog)
110
+ catalog_res = Net::HTTP.get(catalog_uri)
111
+ gz = Zlib::GzipReader.new(StringIO.new(catalog_res))
112
+ catalog_info = gz.read
113
+ doc = Nokogiri::XML(catalog_info)
114
+ label = doc.xpath('//s:label')
115
+ description = doc.xpath('//s:comment')
116
+ catalog = Nesstar::Catalog.new
117
+ catalog.label = label[0].content.strip unless label[0] == nil
118
+ catalog.description = description[0].content.strip unless description[0] == nil
119
+ return catalog
120
+ end
121
+
122
+ #information about the dataset only
123
+ def get_simple_study_information uri, dataset
124
+ dataset_uri = URI.parse(uri)
125
+ dataset_uri.merge!("/obj/fStudy/" + dataset)
126
+ dataset_res = Net::HTTP.get(dataset_uri)
127
+ gz = Zlib::GzipReader.new(StringIO.new(dataset_res))
128
+ dataset_info = gz.read
129
+ doc = Nokogiri::XML(dataset_info)
130
+ label = doc.xpath('//s:label')
131
+ description = doc.xpath('//s:comment')
132
+ study = Nesstar::Study.new
133
+ study.title = label[0].content.strip unless label[0] == nil
134
+ study.abstract = description[0].content.strip unless description[0] == nil
135
+ return study
136
+ end
137
+
138
+ #information about the dataset and its variables
139
+ #inputs are the uri to a dataset file
140
+ def get_study_information uri, dataset
141
+ #TODO use the get_ddi method above
142
+ ddi_uri = URI.parse(uri)
143
+ ddi_uri.merge!("/obj/fStudy/" + dataset)
144
+ ddi_uri.merge!('?http://www.nesstar.org/rdf/method=http://www.nesstar.org/rdf/Dataset/GetDDI')
145
+ res = Net::HTTP.get(ddi_uri)
146
+ gz = Zlib::GzipReader.new(StringIO.new(res))
147
+ xml = gz.read
148
+ catalog = Nesstar::Catalog.new
149
+ study = Nesstar::Study.new
150
+ study_info_hash = Hash.new
151
+ parser = LibXML::XML::Parser.string(xml)
152
+ doc = parser.parse
153
+ studynodes = doc.find('//stdyDscr')
154
+ abstracts = studynodes[0].find('//abstract')
155
+ abstract = ""
156
+ abstracts.each do |ab|
157
+ abstract << ab.first.content.strip
158
+ end
159
+ study.abstract = abstract
160
+ study.title = studynodes[0].find('//stdyDscr/citation/titlStmt/titl')[0].first.content.strip
161
+ study.id = studynodes[0].find('//IDNo')[0].first.content.strip
162
+
163
+ #start and finish dates for study
164
+ dates = []
165
+ date = studynodes[0].find('//sumDscr/collDate')
166
+ date.each do |d|
167
+ a = d.attributes
168
+ study_date = Nesstar::StudyDate.new
169
+ study_date.type = a.get_attribute('event').value.strip
170
+ study_date.date = a.get_attribute('date').value.strip
171
+ dates.push(study_date)
172
+ end
173
+ study.dates = dates
174
+ study.sampling_procedure = studynodes[0].find('//sampProc')[0].first.content.strip unless studynodes[0].find('//sampProc')[0] == nil
175
+ # study.weight = studynodes[0].find('//sampProc')[0].first.content
176
+ study.variables = get_variable_information doc
177
+ return study
178
+ end
179
+
180
+ def get_children
181
+
182
+ end
183
+
184
+ def add_dataset
185
+
186
+ end
187
+
188
+ private
189
+
190
+ #pull out all the datasets for a catalog, recursively if need be
191
+ def dataset_tree doc, uri, catalog, tree_node
192
+ # if tree_node == nil
193
+ # tree_node = Tree::TreeNode.new(catalog, "Catalog Content")
194
+ # end
195
+ links = doc.xpath('//a')
196
+ links.each do |link|
197
+ if link.content.index(uri) != nil && link.content.index(catalog) == nil && link.content.index('fCatalog') == nil
198
+ #its a dataset
199
+ child_node = Tree::TreeNode.new(link.content, catalog + " child")
200
+ tree_node << child_node
201
+ elsif link.content.index('fCatalog') != nil && link.content.index(catalog) == nil
202
+ #its a new catalog
203
+ url = URI.parse(link.content)
204
+ new_catalog = url.path.split('/').last
205
+ catalog_doc = retrieve_html "http://" + url.host + ':' + url.port.to_s, new_catalog
206
+ child_node = Tree::TreeNode.new(link.content, catalog + " child")
207
+ tree_node << child_node
208
+ dataset_tree catalog_doc, url.host, new_catalog, child_node
209
+ end
210
+ end
211
+ return tree_node
212
+ end
213
+
214
+ #pull out all the datasets for a catalog, recursively if need be
215
+ def parse_out_datasets doc, uri, catalog, catalog_hash
216
+ if catalog_hash[catalog] == nil
217
+ catalog_hash[catalog] = []
218
+ end
219
+ links = doc.xpath('//a')
220
+ links.each do |link|
221
+ if link.content.index(uri) != nil && link.content.index(catalog) == nil && link.content.index('fCatalog') == nil
222
+ #its a dataset
223
+ catalog_hash[catalog].push(link.content)
224
+ elsif link.content.index('fCatalog') != nil && link.content.index(catalog) == nil
225
+ #its a new catalog
226
+ url = URI.parse(link.content)
227
+ new_catalog = url.path.split('/').last
228
+ catalog_doc = retrieve_html "http://" + url.host + ':' + url.port.to_s, new_catalog
229
+ parse_out_datasets catalog_doc, url.host, new_catalog, catalog_hash
230
+ end
231
+ end
232
+ return catalog_hash
233
+ end
234
+
235
+ def retrieve_html url, catalog
236
+ uri = URI.parse(url)
237
+ query_string = '/browser/browser?action=LIST&path=ROOT' + CGI.escape('|Properties|children') + "&url=" + CGI.escape('http://') + CGI.escape(uri.host + ':' + uri.port.to_s) + CGI.escape("/obj/fCatalog/" + catalog + "@children")
238
+ full_uri = uri.merge query_string
239
+ res = Net::HTTP.get full_uri
240
+ doc = Nokogiri::HTML(res)
241
+ return doc
242
+ end
243
+
244
+ #information about the variables
245
+ def get_variable_information doc
246
+ variables = []
247
+ variable_info_hash = Hash.new
248
+ docnodes = doc.find('//dataDscr')
249
+ vargroups = docnodes[0].find('//dataDscr/varGrp')
250
+ vargroups.each do |vargroup|
251
+ #hash which holds all the variable groups
252
+ a = vargroup.attributes
253
+ groups = a.get_attribute('var')
254
+ if groups != nil
255
+ groups = a.get_attribute('var')
256
+ variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
257
+ # else
258
+ # variable_info_hash[vargroup.find('./labl')[0].first.content] = groups.value.split(' ')
259
+ end
260
+ end
261
+ vars = docnodes[0].find('//dataDscr/var')
262
+ vars.each do |var|
263
+ variable = Nesstar::Variable.new
264
+ var_attr = var.attributes
265
+ variable.id = var_attr.get_attribute('ID').value.strip unless var_attr.get_attribute('ID') == nil
266
+ variable.name = var_attr.get_attribute('name').value.strip unless var_attr.get_attribute('name') == nil
267
+ variable.file = var_attr.get_attribute('files').value.strip unless var_attr.get_attribute('files') == nil
268
+ variable.interval = var_attr.get_attribute('intrvl').value.strip unless var_attr.get_attribute('intrvl') == nil
269
+ variable.label = var.find('./labl')[0].content.strip unless var.find('./labl')[0] == nil
270
+ rng = var.find('./valrng')
271
+ if rng != nil
272
+ if rng[0] != nil
273
+ range_attr = rng[0].first.attributes
274
+ max_val = range_attr.get_attribute('max')
275
+ variable.max = max_val.value.strip unless max_val == nil
276
+ min_val = range_attr.get_attribute('min')
277
+ variable.min = min_val.value.strip unless min_val == nil
278
+ end
279
+ end
280
+ q = var.find('./qstn')
281
+ if q[0] != nil
282
+ ql = q[0].find('./qstnLit')
283
+ if ql != nil
284
+ if ql[0] != nil
285
+ variable.question = ql[0].first.content.strip
286
+ end
287
+ end
288
+ iv = q[0].find('./ivuInstr')
289
+ if iv != nil
290
+ if iv[0] != nil
291
+ variable.interview_instruction = iv[0].first.content.strip
292
+ end
293
+ end
294
+ end
295
+ stats = var.find('./sumStat')
296
+ summary_stats = []
297
+ stats.each do |stat|
298
+ a = stat.attributes
299
+ # summary_stats[a.get_attribute('type').value] = stat.first.content
300
+ statistic = Nesstar::SummaryStat.new
301
+ statistic.type = a.get_attribute('type').value.strip
302
+ statistic.value = stat.first.content.strip
303
+ summary_stats.push(statistic)
304
+ end
305
+ variable.summary_stats = summary_stats
306
+ catgry = var.find('./catgry')
307
+ categories = []
308
+ #categories in ddi are value domains in mb
309
+ catgry.each do |cat|
310
+ category = Nesstar::Category.new
311
+ category.value = cat.find('./catValu').first.content
312
+ labxml = cat.find('./labl')
313
+ if labxml != nil && labxml[0] != nil
314
+ category.label = labxml[0].first.content.strip unless labxml[0].first == nil
315
+ end
316
+ catstat = cat.find('./catStat')
317
+ category_statistics = []
318
+ catstat.each do |cat|
319
+ category_statistic = Nesstar::CategoryStatistic.new
320
+ a = cat.first.attributes unless cat.first == nil
321
+ if a != nil
322
+ category_statistic.type = a.get_attribute('type').strip unless a.get_attribute('type') == nil
323
+ category_statistic.value = catstat.first.content.strip unless catstat.first == nil
324
+ category_statistic.type = a.get_attribute('type').strip unless a.get_attribute('type') == nil
325
+ category_statistics.push(category_statistic)
326
+ end
327
+ end
328
+ category.category_statistics = category_statistics
329
+ categories.push(category_statistics)
330
+ end
331
+ #what group is the variable in
332
+ variable_info_hash.each_key do |key|
333
+ if variable_info_hash[key].include?(variable.id)
334
+ variable.group = key.strip
335
+ break
336
+ end
337
+ end
338
+
339
+ variable.categories = categories
340
+ variables.push(variable)
341
+ end
342
+ return variables
343
+ end
344
+
345
+ end
346
+ end
347
+ end
metadata ADDED
@@ -0,0 +1,123 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nesstar-api
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Ian Dunlop
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-02-24 00:00:00 +00:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - "="
28
+ - !ruby/object:Gem::Version
29
+ hash: 15
30
+ segments:
31
+ - 1
32
+ - 4
33
+ - 4
34
+ version: 1.4.4
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: rubytree
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - "="
44
+ - !ruby/object:Gem::Version
45
+ hash: 61
46
+ segments:
47
+ - 0
48
+ - 8
49
+ - 1
50
+ version: 0.8.1
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: libxml-ruby
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - "="
60
+ - !ruby/object:Gem::Version
61
+ hash: 27
62
+ segments:
63
+ - 1
64
+ - 1
65
+ - 4
66
+ version: 1.1.4
67
+ type: :runtime
68
+ version_requirements: *id003
69
+ description: This gem provides access to NESSTAR API calls from a data provider
70
+ email:
71
+ - ian.dunlop@manchester.ac.uk
72
+ executables: []
73
+
74
+ extensions: []
75
+
76
+ extra_rdoc_files: []
77
+
78
+ files:
79
+ - lib/nesstar-api/catalog.rb
80
+ - lib/nesstar-api/category.rb
81
+ - lib/nesstar-api/category_statistic.rb
82
+ - lib/nesstar-api/study.rb
83
+ - lib/nesstar-api/study_date.rb
84
+ - lib/nesstar-api/summary_stat.rb
85
+ - lib/nesstar-api/variable.rb
86
+ - lib/nesstar-api/version.rb
87
+ - lib/nesstar-api.rb
88
+ has_rdoc: true
89
+ homepage: http://github.com/mygrid/methodbox/nesstar-api
90
+ licenses: []
91
+
92
+ post_install_message:
93
+ rdoc_options: []
94
+
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ hash: 3
103
+ segments:
104
+ - 0
105
+ version: "0"
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ hash: 3
112
+ segments:
113
+ - 0
114
+ version: "0"
115
+ requirements: []
116
+
117
+ rubyforge_project: nesstar-api
118
+ rubygems_version: 1.3.7
119
+ signing_key:
120
+ specification_version: 3
121
+ summary: Simple API for calling NESSTAR API and returning results
122
+ test_files: []
123
+