bel 0.2.1 → 0.3.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/bel_upgrade CHANGED
@@ -2,10 +2,10 @@
2
2
  # bel_upgrade: Upgrade BEL content to a new set of resources.
3
3
  #
4
4
  # From BEL file
5
- # usage: ruby bel_upgrade -b file.bel -c file.json
5
+ # usage: bel_upgrade -b file.bel -c file.json
6
6
  #
7
7
  # From standard in
8
- # usage: echo "<BEL DOCUMENT STRING>" | ruby bel_upgrade -c file.json
8
+ # usage: echo "<BEL DOCUMENT STRING>" | bel_upgrade -c file.json
9
9
 
10
10
  require 'bel'
11
11
  require 'json'
@@ -16,9 +16,9 @@ require 'open-uri'
16
16
  # setup and parse options
17
17
  options = {}
18
18
  OptionParser.new do |opts|
19
- opts.banner = "Usage: upgrade-namespaces.rb [options] [.bel file]"
19
+ opts.banner = "Usage: bel_upgrade [options] [.bel file]"
20
20
  opts.on('-b', '--bel FILE', 'BEL file to upgrade. STDIN (standard in) can also be used for BEL content.') do |bel|
21
- options['bel'] = bel
21
+ options[:bel] = bel
22
22
  end
23
23
  opts.on("-c", "--changelog [FILE | URI]", "Change log JSON") do |change_log|
24
24
  options['change_log'] = change_log
@@ -26,7 +26,7 @@ OptionParser.new do |opts|
26
26
  end.parse!
27
27
 
28
28
  # option guards
29
- unless options['bel'] or not STDIN.tty?
29
+ unless options[:bel] or not STDIN.tty?
30
30
  $stderr.puts "No bel content provided. Either use --bel option or STDIN (standard in). Use -h / --help for details."
31
31
  exit 1
32
32
  end
@@ -47,13 +47,18 @@ if not File.exists? options['change_log']
47
47
  exit 1
48
48
  end
49
49
  end
50
- if options['bel'] and not File.exists? options['bel']
51
- $stderr.puts "No file for bel, #{options['bel']}"
50
+ if options[:bel] and not File.exists? options[:bel]
51
+ $stderr.puts "No file for bel, #{options[:bel]}"
52
52
  exit 1
53
53
  end
54
54
 
55
55
  # read bel content
56
- content = (STDIN.tty?) ? File.open(options['bel']).read : $stdin.read
56
+ content =
57
+ if options[:bel]
58
+ File.open(options[:bel]).read
59
+ else
60
+ $stdin.read
61
+ end
57
62
 
58
63
  # read change log
59
64
  changelog = nil
@@ -74,70 +79,87 @@ end
74
79
  class Main
75
80
 
76
81
  EvidenceMatcher = Regexp.compile(/SET Evidence = ([0-9a-zA-Z]+)/)
82
+ LostReplaceValues = ['unresolved', 'withdrawn']
77
83
  attr_reader :ttl
78
84
 
79
85
  def initialize(content, change_log)
80
86
  @change_log = change_log
87
+ @redefine_section = @change_log['redefine']
81
88
  @keywords_seen = Set.new
82
- parser = BEL::Script::Parser.new
83
- parser.add_observer self
84
- parser.parse(content)
85
- end
86
- def update(obj)
87
- # redefine namespace based on change log's `redefine` block
88
- if obj.is_a? BEL::Script::NamespaceDefinition
89
- if @change_log.has_key? 'redefine'
90
- redefine = @change_log['redefine']
91
- if redefine.has_key? obj.prefix
92
- entry = redefine[obj.prefix]
93
- obj.prefix = entry['new_keyword']
94
- obj.value = entry['new_url']
89
+ BEL::Script.parse(content) do |obj|
90
+ # redefine namespace based on change log's `redefine` block
91
+ if obj.is_a? NamespaceDefinition
92
+ if @change_log.has_key? 'redefine'
93
+ redefine = @change_log['redefine']
94
+ if redefine.has_key? obj.prefix.to_s
95
+ entry = redefine[obj.prefix.to_s]
96
+ new_keyword = entry['new_keyword'].to_sym
97
+ new_url = entry['new_url']
98
+ obj = NamespaceDefinition.new(new_keyword, new_url)
99
+ end
95
100
  end
96
- end
97
101
 
98
- # deduplicate namespaces for output purposes
99
- if @keywords_seen.include? obj.prefix
100
- return
102
+ # deduplicate namespaces for output purposes
103
+ if @keywords_seen.include? obj.prefix
104
+ next
105
+ end
106
+ @keywords_seen.add(obj.prefix)
101
107
  end
102
- @keywords_seen.add(obj.prefix)
103
- end
104
108
 
105
- if obj.is_a? BEL::Script::Annotation
106
- if obj.name == 'Evidence'
107
- ev = obj.to_s
108
- ev.gsub!(EvidenceMatcher, 'SET Evidence = "\1"')
109
- puts ev
110
- return
109
+ # evidence always needs quoting; backwards-compatibility
110
+ if obj.is_a? Annotation
111
+ if obj.name == 'Evidence'
112
+ ev = obj.to_s
113
+ ev.gsub!(EvidenceMatcher, 'SET Evidence = "\1"')
114
+ puts ev.to_s
115
+ next
116
+ end
111
117
  end
112
- end
113
118
 
114
- if obj.is_a? BEL::Script::Parameter
115
- if @change_log.has_key? obj.ns
116
- clog_ns = @change_log[obj.ns]
117
- if clog_ns.has_key? obj.value
118
- replacement = clog_ns[obj.value]
119
- if replacement == 'unresolved' or replacement == 'withdrawn'
120
- $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement}'"
121
- return
119
+ if obj.is_a? Parameter and obj.ns
120
+ # first try replacing by existing namespace prefix...
121
+ prefix = obj.ns.prefix.to_s
122
+ replacements = @change_log[prefix]
123
+ if replacements
124
+ replacement_value = replacements[obj.value]
125
+ if replacement_value
126
+ if LostReplaceValues.include? replacement_value
127
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
128
+ else
129
+ obj.value = replacement_value
130
+ end
122
131
  end
123
- obj.value = replacement
124
132
  end
125
- end
126
- # redefine param namespace based on change log's `redefine` block
127
- if @change_log.has_key? 'redefine'
128
- redefine = @change_log['redefine']
129
- if redefine.has_key? obj.ns
130
- obj.ns = redefine[obj.ns]['new_keyword']
133
+
134
+ # ...then change namespace if redefined...
135
+ if @redefine_section
136
+ redefinition = @redefine_section[prefix]
137
+ if redefinition
138
+ new_prefix = redefinition['new_keyword']
139
+ new_url = redefinition['new_url']
140
+ obj.ns = NamespaceDefinition.new(new_prefix, new_url)
141
+
142
+ # ...and replace value using new namespace prefix
143
+ replacements = @change_log[new_prefix]
144
+ if replacements
145
+ replacement_value = replacements[obj.value]
146
+ if replacement_value
147
+ if LostReplaceValues.include? replacement_value
148
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
149
+ else
150
+ obj.value = replacement_value
151
+ end
152
+ end
153
+ end
154
+ end
131
155
  end
132
156
  end
133
- return
134
- end
135
157
 
136
- if obj.is_a? BEL::Script::Term
137
- return # do not print (part of statement)
158
+ # do not print Parameter and Term; they are included in Statement
159
+ if not obj.is_a? Parameter and not obj.is_a? Term
160
+ puts obj.to_bel
161
+ end
138
162
  end
139
-
140
- puts obj.to_s
141
163
  end
142
164
 
143
165
  private
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+ # bel_upgrade_term: Upgrade BEL terms to a new set of resources.
3
+ #
4
+ # Terms as CLI option.
5
+ # usage: bel_upgrade_term -t "p(HGNC:A2LD1)" -n "1.0" -c change_log.json
6
+ #
7
+ # Terms from standard in.
8
+ # usage: echo -e "p(EGID:84)\np(HGNC:A2LD1)" | bel_upgrade_term -n "1.0" -c change_log.json
9
+ # usage: cat terms.bel | bel_upgrade_term -n "1.0" -c change_log.json
10
+
11
+ require 'bel'
12
+ require 'json'
13
+ require 'optparse'
14
+ require 'set'
15
+ require 'open-uri'
16
+
17
+ # setup and parse options
18
+ options = {}
19
+ OptionParser.new do |opts|
20
+ opts.banner = "Usage: bel_upgrade [options] [.bel file]"
21
+ opts.on('-t', '--terms TERM[\nTERM...]', 'BEL terms to upgrade (line-separated).') do |terms|
22
+ options[:terms] = terms
23
+ end
24
+ opts.on('-n', '--nsversion NAMESPACE VERSION', 'Assume BEL is described by this namespace version.') do |nsv|
25
+ options[:namespace_version] = nsv
26
+ end
27
+ opts.on("-c", "--changelog [FILE | URI]", "Change log JSON.") do |change_log|
28
+ options['change_log'] = change_log
29
+ end
30
+ end.parse!
31
+
32
+ # option guards
33
+ if not options[:namespace_version]
34
+ $stderr.puts "The --nsversion option is required. Use -h / --help for details."
35
+ exit 1
36
+ end
37
+ unless options[:terms] or not STDIN.tty?
38
+ $stderr.puts "No bel terms provided; try --term, or STDIN (standard in). Use -h / --help for details."
39
+ exit 1
40
+ end
41
+ unless options['change_log']
42
+ $stderr.puts "Missing --changelog option. Use -h / --help for details."
43
+ exit 1
44
+ end
45
+ if not File.exists? options['change_log']
46
+ begin
47
+ open(options['change_log']) do |f|
48
+ unless f.content_type == 'application/json'
49
+ $stderr.puts "Expected application/json content type, actual: #{f.content_type}"
50
+ exit 1
51
+ end
52
+ end
53
+ rescue OpenURI::HTTPError => e
54
+ $stderr.puts "Cannot read URI for change_log, #{options['change_log']}, status: #{e}"
55
+ exit 1
56
+ end
57
+ end
58
+
59
+ # read bel content
60
+ content =
61
+ if options[:terms]
62
+ options[:terms]
63
+ else
64
+ $stdin.read
65
+ end
66
+
67
+ if content.strip().empty?
68
+ $stderr.puts "Empty bel term content. Check input."
69
+ end
70
+ content += "\n"
71
+
72
+ # read change log
73
+ changelog = nil
74
+ if File.exists? options['change_log']
75
+ File.open(options['change_log']) do |f|
76
+ changelog = JSON.parse(f.read)
77
+ end
78
+ else
79
+ open(options['change_log']) do |file|
80
+ changelog = JSON.parse(file.read)
81
+ end
82
+ end
83
+
84
+ unless changelog
85
+ $stderr.puts "Cannot retrieve change_log #{options['change_log']}"
86
+ end
87
+
88
+ class Main
89
+
90
+ LostReplaceValues = ['unresolved', 'withdrawn']
91
+ attr_reader :ttl
92
+
93
+ def initialize(nsversion, content, change_log)
94
+ @change_log = change_log
95
+ @redefine_section = @change_log['redefine']
96
+ resource_index = ResourceIndex.openbel_published_index(nsversion)
97
+ content.each_line do |line|
98
+ parsed_objects = BEL::Script.parse(line, resource_index).to_a
99
+
100
+ if parsed_objects.empty?
101
+ $stderr.puts "parse failure for '#{line.strip}': outputting original"
102
+ puts line
103
+ else
104
+ parsed_objects.each do |obj|
105
+ if obj.is_a? Parameter and obj.ns
106
+ # first try replacing by existing namespace prefix...
107
+ prefix = obj.ns.prefix.to_s
108
+ replacements = @change_log[prefix]
109
+ if replacements
110
+ replacement_value = replacements[obj.value]
111
+ if replacement_value
112
+ if LostReplaceValues.include? replacement_value
113
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
114
+ else
115
+ obj.value = replacement_value
116
+ end
117
+ end
118
+ end
119
+
120
+ # ...then change namespace if redefined...
121
+ if @redefine_section
122
+ redefinition = @redefine_section[prefix]
123
+ if redefinition
124
+ new_prefix = redefinition['new_keyword']
125
+ new_url = redefinition['new_url']
126
+ obj.ns = NamespaceDefinition.new(new_prefix, new_url)
127
+
128
+ # ...and replace value using new namespace prefix
129
+ replacements = @change_log[new_prefix]
130
+ if replacements
131
+ replacement_value = replacements[obj.value]
132
+ if replacement_value
133
+ if LostReplaceValues.include? replacement_value
134
+ $stderr.puts "no replacement value for #{obj.ns} '#{obj.value}' - value '#{replacement_value}'"
135
+ else
136
+ obj.value = replacement_value
137
+ end
138
+ end
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ # do not print Parameter and Term; they are included in Statement
145
+ if obj.is_a? Statement
146
+ puts obj.to_bel
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+
153
+ private
154
+
155
+ def error_file(file_name)
156
+ $stderr.puts "#{file_name} is not readable"
157
+ exit 1
158
+ end
159
+ end
160
+
161
+ Main.new(options[:namespace_version], content, changelog)
162
+ # vim: ts=2 sw=2:
163
+ # encoding: utf-8
data/ext/mri/bel-ast.c ADDED
@@ -0,0 +1,221 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <stdio.h>
4
+ #include "bel-ast.h"
5
+
6
+ bel_ast_node* bel_new_ast_node_token(bel_ast_token_type type) {
7
+ bel_ast_node* node;
8
+ node = malloc(sizeof(bel_ast_node));
9
+ node->token = malloc(sizeof(bel_ast_node_token));
10
+ node->token->type = BEL_TOKEN;
11
+ node->token->ttype = type;
12
+ node->token->left = NULL;
13
+ node->token->right = NULL;
14
+ return node;
15
+ };
16
+
17
+ bel_ast_node* bel_new_ast_node_value(bel_ast_value_type type, char* value) {
18
+ bel_ast_node* node;
19
+ char* copy_value;
20
+
21
+ if (value) {
22
+ copy_value = malloc(strlen(value) + 1);
23
+ strcpy(copy_value, value);
24
+ } else {
25
+ copy_value = NULL;
26
+ }
27
+
28
+ node = malloc(sizeof(bel_ast_node));
29
+ node->value = malloc(sizeof(bel_ast_node_value));
30
+ node->value->type = BEL_VALUE;
31
+ node->value->vtype = type;
32
+ node->value->value = copy_value;
33
+ return node;
34
+ };
35
+
36
+ bel_ast_node* bel_copy_ast_node(bel_ast_node* node) {
37
+ bel_ast_node* copy_node;
38
+ char* copy_value;
39
+
40
+ if (!node) {
41
+ return NULL;
42
+ }
43
+
44
+ copy_node = malloc(sizeof(bel_ast_node));
45
+
46
+ if (node->type_info->type == BEL_VALUE) {
47
+ if (node->value->value) {
48
+ copy_value = malloc(strlen(node->value->value) + 1);
49
+ strcpy(copy_value, node->value->value);
50
+ } else {
51
+ copy_value = NULL;
52
+ }
53
+
54
+ copy_node->value = malloc(sizeof(bel_ast_node_value));
55
+ copy_node->value->type = node->value->type;
56
+ copy_node->value->vtype = node->value->vtype;
57
+ copy_node->value->value = copy_value;
58
+ } else {
59
+ copy_node->token = malloc(sizeof(bel_ast_node_token));
60
+ copy_node->token->type = node->token->type;
61
+ copy_node->token->ttype = node->token->ttype;
62
+ copy_node->token->left = bel_copy_ast_node(node->token->left);
63
+ copy_node->token->right = bel_copy_ast_node(node->token->right);
64
+ }
65
+
66
+ return copy_node;
67
+ }
68
+
69
+ bel_ast_node* bel_set_value(bel_ast_node* node, char* value) {
70
+ char* copy_value;
71
+
72
+ if (!node) {
73
+ // TODO Debug node error to stderr; node is NULL
74
+ return NULL;
75
+ }
76
+
77
+ if (node->type_info->type != BEL_VALUE) {
78
+ // TODO Debug node error to stderr; node cannot hold value
79
+ return NULL;
80
+ }
81
+
82
+ if (value) {
83
+ copy_value = malloc(strlen(value) + 1);
84
+ strcpy(copy_value, value);
85
+ } else {
86
+ copy_value = NULL;
87
+ }
88
+
89
+ node->value->value = copy_value;
90
+ return node;
91
+ }
92
+
93
+ bel_ast* bel_new_ast() {
94
+ bel_ast* ast;
95
+ ast = malloc(sizeof(bel_ast));
96
+ ast->root = NULL;
97
+ return ast;
98
+ };
99
+
100
+ void bel_free_ast(bel_ast* ast) {
101
+ if (!ast) {
102
+ return;
103
+ }
104
+ bel_free_ast_node(ast->root);
105
+ free(ast);
106
+ };
107
+
108
+ void bel_free_ast_node(bel_ast_node* node) {
109
+ if (node->type_info->type == BEL_TOKEN) {
110
+ if (node->token->left != NULL) {
111
+ bel_free_ast_node(node->token->left);
112
+ }
113
+ if (node->token->right != NULL) {
114
+ bel_free_ast_node(node->token->right);
115
+ }
116
+ free(node->token);
117
+ } else {
118
+ free(node->value->value);
119
+ free(node->value);
120
+ }
121
+
122
+ free(node);
123
+ };
124
+
125
+ void bel_print_ast_node(bel_ast_node* node, char* tree_flat_string) {
126
+ char val[1024 * 32];
127
+ if (node == NULL) {
128
+ strcat(tree_flat_string, "(null) ");
129
+ return;
130
+ }
131
+
132
+ switch(node->type_info->type) {
133
+ case BEL_TOKEN:
134
+ switch(node->type_info->ttype) {
135
+ case BEL_TOKEN_ARG:
136
+ strcat(tree_flat_string, "ARG ");
137
+ break;
138
+ case BEL_TOKEN_NV:
139
+ strcat(tree_flat_string, "NV ");
140
+ break;
141
+ case BEL_TOKEN_TERM:
142
+ strcat(tree_flat_string, "TERM ");
143
+ break;
144
+ case BEL_TOKEN_SUBJECT:
145
+ strcat(tree_flat_string, "SUBJECT ");
146
+ break;
147
+ case BEL_TOKEN_REL:
148
+ strcat(tree_flat_string, "REL ");
149
+ break;
150
+ case BEL_TOKEN_OBJECT:
151
+ strcat(tree_flat_string, "OBJECT ");
152
+ break;
153
+ case BEL_TOKEN_STATEMENT:
154
+ strcat(tree_flat_string, "STATEMENT ");
155
+ break;
156
+ };
157
+ bel_print_ast_node(node->token->left, tree_flat_string);
158
+ bel_print_ast_node(node->token->right, tree_flat_string);
159
+ break;
160
+ case BEL_VALUE:
161
+ switch(node->type_info->vtype) {
162
+ case BEL_VALUE_FX:
163
+ if (node->value->value == NULL) {
164
+ strcat(tree_flat_string, "fx((null)) ");
165
+ } else {
166
+ sprintf(val, "fx(%s) ", node->value->value);
167
+ strcat(tree_flat_string, val);
168
+ }
169
+ break;
170
+ case BEL_VALUE_REL:
171
+ if (node->value->value == NULL) {
172
+ strcat(tree_flat_string, "rel((null)) ");
173
+ } else {
174
+ sprintf(val, "rel(%s) ", node->value->value);
175
+ strcat(tree_flat_string, val);
176
+ }
177
+ break;
178
+ case BEL_VALUE_PFX:
179
+ if (node->value->value == NULL) {
180
+ strcat(tree_flat_string, "pfx((null)) ");
181
+ } else {
182
+ sprintf(val, "pfx(%s) ", node->value->value);
183
+ strcat(tree_flat_string, val);
184
+ }
185
+ break;
186
+ case BEL_VALUE_VAL:
187
+ if (node->value->value == NULL) {
188
+ strcat(tree_flat_string, "val((null)) ");
189
+ } else {
190
+ sprintf(val, "val(%s) ", node->value->value);
191
+ strcat(tree_flat_string, val);
192
+ }
193
+ break;
194
+ };
195
+ break;
196
+ };
197
+ };
198
+
199
+ void bel_print_ast(bel_ast* ast) {
200
+ char tree_flat_string[1024 * 32];
201
+
202
+ if (!ast) {
203
+ return;
204
+ }
205
+
206
+ memset(tree_flat_string, '\0', 1024 * 32);
207
+ bel_print_ast_node(ast->root, tree_flat_string);
208
+ fprintf(stdout, "%s\n", tree_flat_string);
209
+ };
210
+
211
+ char* bel_ast_as_string(bel_ast* ast) {
212
+ char *tree_flat_string;
213
+
214
+ if (!ast) {
215
+ return NULL;
216
+ }
217
+
218
+ tree_flat_string = calloc(1024 * 32, 1);
219
+ bel_print_ast_node(ast->root, tree_flat_string);
220
+ return tree_flat_string;
221
+ };