bel 0.2.1 → 0.3.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
data/bin/bel_upgrade CHANGED
@@ -2,10 +2,10 @@
2
2
  # bel_upgrade: Upgrade BEL content to a new set of resources.
3
3
  #
4
4
  # From BEL file
5
- # usage: ruby bel_upgrade -b file.bel -c file.json
5
+ # usage: bel_upgrade -b file.bel -c file.json
6
6
  #
7
7
  # From standard in
8
- # usage: echo "<BEL DOCUMENT STRING>" | ruby bel_upgrade -c file.json
8
+ # usage: echo "<BEL DOCUMENT STRING>" | bel_upgrade -c file.json
9
9
 
10
10
  require 'bel'
11
11
  require 'json'
@@ -16,9 +16,9 @@ require 'open-uri'
16
16
  # setup and parse options
17
17
  options = {}
18
18
  OptionParser.new do |opts|
19
- opts.banner = "Usage: upgrade-namespaces.rb [options] [.bel file]"
19
+ opts.banner = "Usage: bel_upgrade [options] [.bel file]"
20
20
  opts.on('-b', '--bel FILE', 'BEL file to upgrade. STDIN (standard in) can also be used for BEL content.') do |bel|
21
- options['bel'] = bel
21
+ options[:bel] = bel
22
22
  end
23
23
  opts.on("-c", "--changelog [FILE | URI]", "Change log JSON") do |change_log|
24
24
  options['change_log'] = change_log
@@ -26,7 +26,7 @@ OptionParser.new do |opts|
26
26
  end.parse!
27
27
 
28
28
  # option guards
29
- unless options['bel'] or not STDIN.tty?
29
+ unless options[:bel] or not STDIN.tty?
30
30
  $stderr.puts "No bel content provided. Either use --bel option or STDIN (standard in). Use -h / --help for details."
31
31
  exit 1
32
32
  end
@@ -47,13 +47,18 @@ if not File.exists? options['change_log']
47
47
  exit 1
48
48
  end
49
49
  end
50
- if options['bel'] and not File.exists? options['bel']
51
- $stderr.puts "No file for bel, #{options['bel']}"
50
+ if options[:bel] and not File.exists? options[:bel]
51
+ $stderr.puts "No file for bel, #{options[:bel]}"
52
52
  exit 1
53
53
  end
54
54
 
55
55
  # read bel content
56
- content = (STDIN.tty?) ? File.open(options['bel']).read : $stdin.read
56
+ content =
57
+ if options[:bel]
58
+ File.open(options[:bel]).read
59
+ else
60
+ $stdin.read
61
+ end
57
62
 
58
63
  # read change log
59
64
  changelog = nil
@@ -74,70 +79,87 @@ end
74
79
  class Main
75
80
 
76
81
  EvidenceMatcher = Regexp.compile(/SET Evidence = ([0-9a-zA-Z]+)/)
82
+ LostReplaceValues = ['unresolved', 'withdrawn']
77
83
  attr_reader :ttl
78
84
 
79
85
  def initialize(content, change_log)
80
86
  @change_log = change_log
87
+ @redefine_section = @change_log['redefine']
81
88
  @keywords_seen = Set.new
82
- parser = BEL::Script::Parser.new
83
- parser.add_observer self
84
- parser.parse(content)
85
- end
86
- def update(obj)
87
- # redefine namespace based on change log's `redefine` block
88
- if obj.is_a? BEL::Script::NamespaceDefinition
89
- if @change_log.has_key? 'redefine'
90
- redefine = @change_log['redefine']
91
- if redefine.has_key? obj.prefix
92
- entry = redefine[obj.prefix]
93
- obj.prefix = entry['new_keyword']
94
- obj.value = entry['new_url']
89
+ BEL::Script.parse(content) do |obj|
90
+ # redefine namespace based on change log's `redefine` block
91
+ if obj.is_a? NamespaceDefinition
92
+ if @change_log.has_key? 'redefine'
93
+ redefine = @change_log['redefine']
94
+ if redefine.has_key? obj.prefix.to_s
95
+ entry = redefine[obj.prefix.to_s]
96
+ new_keyword = entry['new_keyword'].to_sym
97
+ new_url = entry['new_url']
98
+ obj = NamespaceDefinition.new(new_keyword, new_url)
99
+ end
95
100
  end
96
- end
97
101
 
98
- # deduplicate namespaces for output purposes
99
- if @keywords_seen.include? obj.prefix
100
- return
102
+ # deduplicate namespaces for output purposes
103
+ if @keywords_seen.include? obj.prefix
104
+ next
105
+ end
106
+ @keywords_seen.add(obj.prefix)
101
107
  end
102
- @keywords_seen.add(obj.prefix)
103
- end
104
108
 
105
- if obj.is_a? BEL::Script::Annotation
106
- if obj.name == 'Evidence'
107
- ev = obj.to_s
108
- ev.gsub!(EvidenceMatcher, 'SET Evidence = "\1"')
109
- puts ev
110
- return
109
+ # evidence always needs quoting; backwards-compatibility
110
+ if obj.is_a? Annotation
111
+ if obj.name == 'Evidence'
112
+ ev = obj.to_s
113
+ ev.gsub!(EvidenceMatcher, 'SET Evidence = "\1"')
114
+ puts ev.to_s
115
+ next
116
+ end
111
117
  end
112
- end
113
118
 
114
- if obj.is_a? BEL::Script::Parameter
115
- if @change_log.has_key? obj.ns
116
- clog_ns = @change_log[obj.ns]
117
- if clog_ns.has_key? obj.value
118
- replacement = clog_ns[obj.value]
119
- if replacement == 'unresolved' or replacement == 'withdrawn'
120
- $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement}'"
121
- return
119
+ if obj.is_a? Parameter and obj.ns
120
+ # first try replacing by existing namespace prefix...
121
+ prefix = obj.ns.prefix.to_s
122
+ replacements = @change_log[prefix]
123
+ if replacements
124
+ replacement_value = replacements[obj.value]
125
+ if replacement_value
126
+ if LostReplaceValues.include? replacement_value
127
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
128
+ else
129
+ obj.value = replacement_value
130
+ end
122
131
  end
123
- obj.value = replacement
124
132
  end
125
- end
126
- # redefine param namespace based on change log's `redefine` block
127
- if @change_log.has_key? 'redefine'
128
- redefine = @change_log['redefine']
129
- if redefine.has_key? obj.ns
130
- obj.ns = redefine[obj.ns]['new_keyword']
133
+
134
+ # ...then change namespace if redefined...
135
+ if @redefine_section
136
+ redefinition = @redefine_section[prefix]
137
+ if redefinition
138
+ new_prefix = redefinition['new_keyword']
139
+ new_url = redefinition['new_url']
140
+ obj.ns = NamespaceDefinition.new(new_prefix, new_url)
141
+
142
+ # ...and replace value using new namespace prefix
143
+ replacements = @change_log[new_prefix]
144
+ if replacements
145
+ replacement_value = replacements[obj.value]
146
+ if replacement_value
147
+ if LostReplaceValues.include? replacement_value
148
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
149
+ else
150
+ obj.value = replacement_value
151
+ end
152
+ end
153
+ end
154
+ end
131
155
  end
132
156
  end
133
- return
134
- end
135
157
 
136
- if obj.is_a? BEL::Script::Term
137
- return # do not print (part of statement)
158
+ # do not print Parameter and Term; they are included in Statement
159
+ if not obj.is_a? Parameter and not obj.is_a? Term
160
+ puts obj.to_bel
161
+ end
138
162
  end
139
-
140
- puts obj.to_s
141
163
  end
142
164
 
143
165
  private
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+ # bel_upgrade_term: Upgrade BEL terms to a new set of resources.
3
+ #
4
+ # Terms as CLI option.
5
+ # usage: bel_upgrade_term -t "p(HGNC:A2LD1)" -n "1.0" -c change_log.json
6
+ #
7
+ # Terms from standard in.
8
+ # usage: echo -e "p(EGID:84)\np(HGNC:A2LD1)" | bel_upgrade_term -n "1.0" -c change_log.json
9
+ # usage: cat terms.bel | bel_upgrade_term -n "1.0" -c change_log.json
10
+
11
+ require 'bel'
12
+ require 'json'
13
+ require 'optparse'
14
+ require 'set'
15
+ require 'open-uri'
16
+
17
+ # setup and parse options
18
+ options = {}
19
+ OptionParser.new do |opts|
20
+ opts.banner = "Usage: bel_upgrade [options] [.bel file]"
21
+ opts.on('-t', '--terms TERM[\nTERM...]', 'BEL terms to upgrade (line-separated).') do |terms|
22
+ options[:terms] = terms
23
+ end
24
+ opts.on('-n', '--nsversion NAMESPACE VERSION', 'Assume BEL is described by this namespace version.') do |nsv|
25
+ options[:namespace_version] = nsv
26
+ end
27
+ opts.on("-c", "--changelog [FILE | URI]", "Change log JSON.") do |change_log|
28
+ options['change_log'] = change_log
29
+ end
30
+ end.parse!
31
+
32
+ # option guards
33
+ if not options[:namespace_version]
34
+ $stderr.puts "The --nsversion option is required. Use -h / --help for details."
35
+ exit 1
36
+ end
37
+ unless options[:terms] or not STDIN.tty?
38
+ $stderr.puts "No bel terms provided; try --term, or STDIN (standard in). Use -h / --help for details."
39
+ exit 1
40
+ end
41
+ unless options['change_log']
42
+ $stderr.puts "Missing --changelog option. Use -h / --help for details."
43
+ exit 1
44
+ end
45
+ if not File.exists? options['change_log']
46
+ begin
47
+ open(options['change_log']) do |f|
48
+ unless f.content_type == 'application/json'
49
+ $stderr.puts "Expected application/json content type, actual: #{f.content_type}"
50
+ exit 1
51
+ end
52
+ end
53
+ rescue OpenURI::HTTPError => e
54
+ $stderr.puts "Cannot read URI for change_log, #{options['change_log']}, status: #{e}"
55
+ exit 1
56
+ end
57
+ end
58
+
59
+ # read bel content
60
+ content =
61
+ if options[:terms]
62
+ options[:terms]
63
+ else
64
+ $stdin.read
65
+ end
66
+
67
+ if content.strip().empty?
68
+ $stderr.puts "Empty bel term content. Check input."
69
+ end
70
+ content += "\n"
71
+
72
+ # read change log
73
+ changelog = nil
74
+ if File.exists? options['change_log']
75
+ File.open(options['change_log']) do |f|
76
+ changelog = JSON.parse(f.read)
77
+ end
78
+ else
79
+ open(options['change_log']) do |file|
80
+ changelog = JSON.parse(file.read)
81
+ end
82
+ end
83
+
84
+ unless changelog
85
+ $stderr.puts "Cannot retrieve change_log #{options['change_log']}"
86
+ end
87
+
88
+ class Main
89
+
90
+ LostReplaceValues = ['unresolved', 'withdrawn']
91
+ attr_reader :ttl
92
+
93
+ def initialize(nsversion, content, change_log)
94
+ @change_log = change_log
95
+ @redefine_section = @change_log['redefine']
96
+ resource_index = ResourceIndex.openbel_published_index(nsversion)
97
+ content.each_line do |line|
98
+ parsed_objects = BEL::Script.parse(line, resource_index).to_a
99
+
100
+ if parsed_objects.empty?
101
+ $stderr.puts "parse failure for '#{line.strip}': outputting original"
102
+ puts line
103
+ else
104
+ parsed_objects.each do |obj|
105
+ if obj.is_a? Parameter and obj.ns
106
+ # first try replacing by existing namespace prefix...
107
+ prefix = obj.ns.prefix.to_s
108
+ replacements = @change_log[prefix]
109
+ if replacements
110
+ replacement_value = replacements[obj.value]
111
+ if replacement_value
112
+ if LostReplaceValues.include? replacement_value
113
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
114
+ else
115
+ obj.value = replacement_value
116
+ end
117
+ end
118
+ end
119
+
120
+ # ...then change namespace if redefined...
121
+ if @redefine_section
122
+ redefinition = @redefine_section[prefix]
123
+ if redefinition
124
+ new_prefix = redefinition['new_keyword']
125
+ new_url = redefinition['new_url']
126
+ obj.ns = NamespaceDefinition.new(new_prefix, new_url)
127
+
128
+ # ...and replace value using new namespace prefix
129
+ replacements = @change_log[new_prefix]
130
+ if replacements
131
+ replacement_value = replacements[obj.value]
132
+ if replacement_value
133
+ if LostReplaceValues.include? replacement_value
134
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
135
+ else
136
+ obj.value = replacement_value
137
+ end
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ # do not print Parameter and Term; they are included in Statement
145
+ if obj.is_a? Statement
146
+ puts obj.to_bel
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+
153
+ private
154
+
155
+ def error_file(file_name)
156
+ $stderr.puts "#{file_name} is not readable"
157
+ exit 1
158
+ end
159
+ end
160
+
161
+ Main.new(options[:namespace_version], content, changelog)
162
+ # vim: ts=2 sw=2:
163
+ # encoding: utf-8
data/ext/mri/bel-ast.c ADDED
@@ -0,0 +1,221 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <stdio.h>
4
+ #include "bel-ast.h"
5
+
6
+ bel_ast_node* bel_new_ast_node_token(bel_ast_token_type type) {
7
+ bel_ast_node* node;
8
+ node = malloc(sizeof(bel_ast_node));
9
+ node->token = malloc(sizeof(bel_ast_node_token));
10
+ node->token->type = BEL_TOKEN;
11
+ node->token->ttype = type;
12
+ node->token->left = NULL;
13
+ node->token->right = NULL;
14
+ return node;
15
+ };
16
+
17
+ bel_ast_node* bel_new_ast_node_value(bel_ast_value_type type, char* value) {
18
+ bel_ast_node* node;
19
+ char* copy_value;
20
+
21
+ if (value) {
22
+ copy_value = malloc(strlen(value) + 1);
23
+ strcpy(copy_value, value);
24
+ } else {
25
+ copy_value = NULL;
26
+ }
27
+
28
+ node = malloc(sizeof(bel_ast_node));
29
+ node->value = malloc(sizeof(bel_ast_node_value));
30
+ node->value->type = BEL_VALUE;
31
+ node->value->vtype = type;
32
+ node->value->value = copy_value;
33
+ return node;
34
+ };
35
+
36
+ bel_ast_node* bel_copy_ast_node(bel_ast_node* node) {
37
+ bel_ast_node* copy_node;
38
+ char* copy_value;
39
+
40
+ if (!node) {
41
+ return NULL;
42
+ }
43
+
44
+ copy_node = malloc(sizeof(bel_ast_node));
45
+
46
+ if (node->type_info->type == BEL_VALUE) {
47
+ if (node->value->value) {
48
+ copy_value = malloc(strlen(node->value->value) + 1);
49
+ strcpy(copy_value, node->value->value);
50
+ } else {
51
+ copy_value = NULL;
52
+ }
53
+
54
+ copy_node->value = malloc(sizeof(bel_ast_node_value));
55
+ copy_node->value->type = node->value->type;
56
+ copy_node->value->vtype = node->value->vtype;
57
+ copy_node->value->value = copy_value;
58
+ } else {
59
+ copy_node->token = malloc(sizeof(bel_ast_node_token));
60
+ copy_node->token->type = node->token->type;
61
+ copy_node->token->ttype = node->token->ttype;
62
+ copy_node->token->left = bel_copy_ast_node(node->token->left);
63
+ copy_node->token->right = bel_copy_ast_node(node->token->right);
64
+ }
65
+
66
+ return copy_node;
67
+ }
68
+
69
+ bel_ast_node* bel_set_value(bel_ast_node* node, char* value) {
70
+ char* copy_value;
71
+
72
+ if (!node) {
73
+ // TODO Debug node error to stderr; node is NULL
74
+ return NULL;
75
+ }
76
+
77
+ if (node->type_info->type != BEL_VALUE) {
78
+ // TODO Debug node error to stderr; node cannot hold value
79
+ return NULL;
80
+ }
81
+
82
+ if (value) {
83
+ copy_value = malloc(strlen(value) + 1);
84
+ strcpy(copy_value, value);
85
+ } else {
86
+ copy_value = NULL;
87
+ }
88
+
89
+ node->value->value = copy_value;
90
+ return node;
91
+ }
92
+
93
+ bel_ast* bel_new_ast() {
94
+ bel_ast* ast;
95
+ ast = malloc(sizeof(bel_ast));
96
+ ast->root = NULL;
97
+ return ast;
98
+ };
99
+
100
+ void bel_free_ast(bel_ast* ast) {
101
+ if (!ast) {
102
+ return;
103
+ }
104
+ bel_free_ast_node(ast->root);
105
+ free(ast);
106
+ };
107
+
108
+ void bel_free_ast_node(bel_ast_node* node) {
109
+ if (node->type_info->type == BEL_TOKEN) {
110
+ if (node->token->left != NULL) {
111
+ bel_free_ast_node(node->token->left);
112
+ }
113
+ if (node->token->right != NULL) {
114
+ bel_free_ast_node(node->token->right);
115
+ }
116
+ free(node->token);
117
+ } else {
118
+ free(node->value->value);
119
+ free(node->value);
120
+ }
121
+
122
+ free(node);
123
+ };
124
+
125
+ void bel_print_ast_node(bel_ast_node* node, char* tree_flat_string) {
126
+ char val[1024 * 32];
127
+ if (node == NULL) {
128
+ strcat(tree_flat_string, "(null) ");
129
+ return;
130
+ }
131
+
132
+ switch(node->type_info->type) {
133
+ case BEL_TOKEN:
134
+ switch(node->type_info->ttype) {
135
+ case BEL_TOKEN_ARG:
136
+ strcat(tree_flat_string, "ARG ");
137
+ break;
138
+ case BEL_TOKEN_NV:
139
+ strcat(tree_flat_string, "NV ");
140
+ break;
141
+ case BEL_TOKEN_TERM:
142
+ strcat(tree_flat_string, "TERM ");
143
+ break;
144
+ case BEL_TOKEN_SUBJECT:
145
+ strcat(tree_flat_string, "SUBJECT ");
146
+ break;
147
+ case BEL_TOKEN_REL:
148
+ strcat(tree_flat_string, "REL ");
149
+ break;
150
+ case BEL_TOKEN_OBJECT:
151
+ strcat(tree_flat_string, "OBJECT ");
152
+ break;
153
+ case BEL_TOKEN_STATEMENT:
154
+ strcat(tree_flat_string, "STATEMENT ");
155
+ break;
156
+ };
157
+ bel_print_ast_node(node->token->left, tree_flat_string);
158
+ bel_print_ast_node(node->token->right, tree_flat_string);
159
+ break;
160
+ case BEL_VALUE:
161
+ switch(node->type_info->vtype) {
162
+ case BEL_VALUE_FX:
163
+ if (node->value->value == NULL) {
164
+ strcat(tree_flat_string, "fx((null)) ");
165
+ } else {
166
+ sprintf(val, "fx(%s) ", node->value->value);
167
+ strcat(tree_flat_string, val);
168
+ }
169
+ break;
170
+ case BEL_VALUE_REL:
171
+ if (node->value->value == NULL) {
172
+ strcat(tree_flat_string, "rel((null)) ");
173
+ } else {
174
+ sprintf(val, "rel(%s) ", node->value->value);
175
+ strcat(tree_flat_string, val);
176
+ }
177
+ break;
178
+ case BEL_VALUE_PFX:
179
+ if (node->value->value == NULL) {
180
+ strcat(tree_flat_string, "pfx((null)) ");
181
+ } else {
182
+ sprintf(val, "pfx(%s) ", node->value->value);
183
+ strcat(tree_flat_string, val);
184
+ }
185
+ break;
186
+ case BEL_VALUE_VAL:
187
+ if (node->value->value == NULL) {
188
+ strcat(tree_flat_string, "val((null)) ");
189
+ } else {
190
+ sprintf(val, "val(%s) ", node->value->value);
191
+ strcat(tree_flat_string, val);
192
+ }
193
+ break;
194
+ };
195
+ break;
196
+ };
197
+ };
198
+
199
+ void bel_print_ast(bel_ast* ast) {
200
+ char tree_flat_string[1024 * 32];
201
+
202
+ if (!ast) {
203
+ return;
204
+ }
205
+
206
+ memset(tree_flat_string, '\0', 1024 * 32);
207
+ bel_print_ast_node(ast->root, tree_flat_string);
208
+ fprintf(stdout, "%s\n", tree_flat_string);
209
+ };
210
+
211
+ char* bel_ast_as_string(bel_ast* ast) {
212
+ char *tree_flat_string;
213
+
214
+ if (!ast) {
215
+ return NULL;
216
+ }
217
+
218
+ tree_flat_string = calloc(1024 * 32, 1);
219
+ bel_print_ast_node(ast->root, tree_flat_string);
220
+ return tree_flat_string;
221
+ };