pbf_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,665 @@
1
+ #include "pbf_parser.h"
2
+
3
+ /*
4
+ Set string encoding to UTF8
5
+ See http://tenderlovemaking.com/2009/06/26/string-encoding-in-ruby-1-9-c-extensions.html
6
+ */
7
+ static VALUE str_new(const char *str) {
8
+ VALUE string = rb_str_new2(str);
9
+
10
+ #ifdef HAVE_RUBY_ENCODING_H
11
+ int enc = rb_enc_find_index("UTF-8");
12
+ if(enc != -1) rb_enc_associate_index(string, enc);
13
+ #endif
14
+
15
+ return string;
16
+ }
17
+
18
+ static size_t get_header_size(FILE *input)
19
+ {
20
+ char buffer[4];
21
+
22
+ if(fread(buffer, sizeof(buffer), 1, input) != 1)
23
+ return 0;
24
+
25
+ return ntohl(*((size_t *)buffer));
26
+ }
27
+
28
+ static char *parse_binary_str(ProtobufCBinaryData bstr)
29
+ {
30
+ char *str = calloc(bstr.len + 1, 1);
31
+ memcpy(str, bstr.data, bstr.len);
32
+
33
+ return str;
34
+ }
35
+
36
+ static BlobHeader *read_blob_header(FILE *input)
37
+ {
38
+ void *buffer;
39
+ size_t length = get_header_size(input);
40
+ BlobHeader *header = NULL;
41
+
42
+ if(length < 1 || length > MAX_BLOB_HEADER_SIZE)
43
+ {
44
+ if(feof(input))
45
+ return NULL;
46
+ else
47
+ rb_raise(rb_eIOError, "Invalid blob header size");
48
+ }
49
+
50
+ if(!(buffer = malloc(length)))
51
+ rb_raise(rb_eNoMemError, "Unable to allocate memory for the blob header");
52
+
53
+ if(!fread(buffer, length, 1, input))
54
+ {
55
+ free(buffer);
56
+ rb_raise(rb_eIOError, "Unable to read the blob header");
57
+ }
58
+
59
+ header = blob_header__unpack(NULL, length, buffer);
60
+
61
+ free(buffer);
62
+
63
+ if(header == NULL)
64
+ rb_raise(rb_eIOError, "Unable to unpack the blob header");
65
+
66
+ return header;
67
+ }
68
+
69
+ static void *read_blob(FILE *input, size_t length, size_t *raw_length)
70
+ {
71
+ VALUE exc = Qnil;
72
+ void *buffer = NULL;
73
+ Blob *blob = NULL;
74
+
75
+ if(length < 1 || length > MAX_BLOB_SIZE)
76
+ rb_raise(rb_eIOError, "Invalid blob size");
77
+
78
+ if(!(buffer = malloc(length)))
79
+ rb_raise(rb_eNoMemError, "Unable to allocate memory for the blob");
80
+
81
+ if(fread(buffer, length, 1, input))
82
+ blob = blob__unpack(NULL, length, buffer);
83
+
84
+ free(buffer);
85
+
86
+ if(blob == NULL)
87
+ rb_raise(rb_eIOError, "Unable to read the blob");
88
+
89
+ void *data = NULL;
90
+
91
+ if(blob->has_raw)
92
+ {
93
+ if(!(data = malloc(blob->raw.len)))
94
+ {
95
+ exc = rb_exc_new2(rb_eNoMemError, "Unable to allocate memory for the data");
96
+ goto exit_nicely;
97
+ }
98
+
99
+ memcpy(data, blob->raw.data, blob->raw.len);
100
+ *raw_length = blob->raw.len;
101
+ }
102
+ else if(blob->has_zlib_data)
103
+ {
104
+ if(!(data = malloc(MAX_BLOB_SIZE)))
105
+ {
106
+ exc = rb_exc_new2(rb_eNoMemError, "Unable to allocate memory for the data");
107
+ goto exit_nicely;
108
+ }
109
+
110
+ int ret;
111
+ z_stream strm;
112
+
113
+ strm.zalloc = Z_NULL;
114
+ strm.zfree = Z_NULL;
115
+ strm.opaque = Z_NULL;
116
+ strm.avail_in = (unsigned int)blob->zlib_data.len;
117
+ strm.next_in = blob->zlib_data.data;
118
+ strm.avail_out = blob->raw_size;
119
+ strm.next_out = data;
120
+
121
+ ret = inflateInit(&strm);
122
+
123
+ if (ret != Z_OK)
124
+ {
125
+ exc = rb_exc_new2(rb_eRuntimeError, "Zlib init failed");
126
+ goto exit_nicely;
127
+ }
128
+
129
+ ret = inflate(&strm, Z_NO_FLUSH);
130
+
131
+ (void)inflateEnd(&strm);
132
+
133
+ if (ret != Z_STREAM_END)
134
+ {
135
+ exc = rb_exc_new2(rb_eRuntimeError, "Zlib compression failed");
136
+ goto exit_nicely;
137
+ }
138
+
139
+ *raw_length = blob->raw_size;
140
+ }
141
+ else if(blob->has_lzma_data)
142
+ {
143
+ exc = rb_exc_new2(rb_eNotImpError, "LZMA compression is not supported");
144
+ goto exit_nicely;
145
+ }
146
+ else
147
+ {
148
+ exc = rb_exc_new2(rb_eNotImpError, "Unknown blob format");
149
+ goto exit_nicely;
150
+ }
151
+
152
+ exit_nicely:
153
+ if(blob) blob__free_unpacked(blob, NULL);
154
+ if(!data) free(data);
155
+ if(exc != Qnil) rb_exc_raise(exc);
156
+
157
+ return data;
158
+ }
159
+
160
+ static VALUE init_data_arr()
161
+ {
162
+ VALUE data = rb_hash_new();
163
+
164
+ rb_hash_aset(data, STR2SYM("nodes"), rb_ary_new());
165
+ rb_hash_aset(data, STR2SYM("ways"), rb_ary_new());
166
+ rb_hash_aset(data, STR2SYM("relations"), rb_ary_new());
167
+
168
+ return data;
169
+ }
170
+
171
+ static void add_info(VALUE hash, Info *info, StringTable *string_table, double ts_granularity)
172
+ {
173
+ VALUE version, timestamp, changeset, uid, user;
174
+
175
+ version = info->version ? INT2NUM(info->version) : Qnil;
176
+ timestamp = info->timestamp ? LL2NUM(info->timestamp * ts_granularity) : Qnil;
177
+ changeset = info->changeset ? LL2NUM(info->changeset) : Qnil;
178
+ uid = info->uid ? INT2NUM(info->uid) : Qnil;
179
+ user = info->user_sid ? str_new(parse_binary_str(string_table->s[info->user_sid])) : Qnil;
180
+
181
+ rb_hash_aset(hash, STR2SYM("version"), version);
182
+ rb_hash_aset(hash, STR2SYM("timestamp"), timestamp);
183
+ rb_hash_aset(hash, STR2SYM("changeset"), changeset);
184
+ rb_hash_aset(hash, STR2SYM("uid"), uid);
185
+ rb_hash_aset(hash, STR2SYM("user"), user);
186
+ }
187
+
188
+ static int parse_osm_header(VALUE obj, FILE *input)
189
+ {
190
+ BlobHeader *header = read_blob_header(input);
191
+
192
+ // EOF reached
193
+ if(header == NULL)
194
+ rb_raise(rb_eEOFError, "EOF reached without finding data");
195
+
196
+ if(strcmp("OSMHeader", header->type) != 0)
197
+ rb_raise(rb_eIOError, "OSMHeader not found, probably the file is corrupt or invalid");
198
+
199
+ void *blob = NULL;
200
+ size_t blob_length = 0, datasize = header->datasize;
201
+ HeaderBlock *header_block = NULL;
202
+
203
+ blob_header__free_unpacked(header, NULL);
204
+
205
+ blob = read_blob(input, datasize, &blob_length);
206
+ header_block = header_block__unpack(NULL, blob_length, blob);
207
+
208
+ if(header_block == NULL)
209
+ rb_raise(rb_eIOError, "Unable to unpack the HeaderBlock");
210
+
211
+ VALUE header_hash = rb_hash_new();
212
+ VALUE bbox_hash = rb_hash_new();
213
+
214
+ VALUE required_features = Qnil;
215
+ VALUE optional_features = Qnil;
216
+ VALUE writingprogram = Qnil;
217
+ VALUE source = Qnil;
218
+
219
+ VALUE osmosis_replication_timestamp = Qnil;
220
+ VALUE osmosis_replication_sequence_number = Qnil;
221
+ VALUE osmosis_replication_base_url = Qnil;
222
+
223
+ if(header_block->n_required_features > 0)
224
+ {
225
+ required_features = rb_ary_new();
226
+
227
+ for(int i = 0; i < (int)header_block->n_required_features; i++)
228
+ rb_ary_push(required_features, str_new(header_block->required_features[i]));
229
+ }
230
+
231
+ if(header_block->n_optional_features > 0)
232
+ {
233
+ optional_features = rb_ary_new();
234
+
235
+ for(int i = 0; i < (int)header_block->n_optional_features; i++)
236
+ rb_ary_push(optional_features, str_new(header_block->optional_features[i]));
237
+ }
238
+
239
+ if(header_block->writingprogram)
240
+ writingprogram = str_new(header_block->writingprogram);
241
+
242
+ if(header_block->source)
243
+ source = str_new(header_block->source);
244
+
245
+ if(header_block->bbox)
246
+ {
247
+ rb_hash_aset(bbox_hash, STR2SYM("top"), rb_float_new(header_block->bbox->top * NANO_DEGREE));
248
+ rb_hash_aset(bbox_hash, STR2SYM("right"), rb_float_new(header_block->bbox->right * NANO_DEGREE));
249
+ rb_hash_aset(bbox_hash, STR2SYM("bottom"), rb_float_new(header_block->bbox->bottom * NANO_DEGREE));
250
+ rb_hash_aset(bbox_hash, STR2SYM("left"), rb_float_new(header_block->bbox->left * NANO_DEGREE));
251
+ }
252
+
253
+ if(header_block->has_osmosis_replication_timestamp)
254
+ osmosis_replication_timestamp = ULL2NUM(header_block->osmosis_replication_timestamp);
255
+
256
+ if(header_block->has_osmosis_replication_sequence_number)
257
+ osmosis_replication_sequence_number = ULL2NUM(header_block->osmosis_replication_sequence_number);
258
+
259
+ if(header_block->osmosis_replication_base_url)
260
+ osmosis_replication_base_url = str_new(header_block->osmosis_replication_base_url);
261
+
262
+ rb_hash_aset(header_hash, str_new("bbox"), bbox_hash);
263
+ rb_hash_aset(header_hash, str_new("required_features"), required_features);
264
+ rb_hash_aset(header_hash, str_new("optional_features"), optional_features);
265
+ rb_hash_aset(header_hash, str_new("writing_program"), writingprogram);
266
+ rb_hash_aset(header_hash, str_new("source"), source);
267
+ rb_hash_aset(header_hash, str_new("osmosis_replication_timestamp"), osmosis_replication_timestamp);
268
+ rb_hash_aset(header_hash, str_new("osmosis_replication_sequence_number"), osmosis_replication_sequence_number);
269
+ rb_hash_aset(header_hash, str_new("osmosis_replication_base_url"), osmosis_replication_base_url);
270
+
271
+ rb_iv_set(obj, "@header", header_hash);
272
+
273
+ header_block__free_unpacked(header_block, NULL);
274
+
275
+ return 1;
276
+ }
277
+
278
+ static void process_nodes(VALUE out, PrimitiveGroup *group, StringTable *string_table, int64_t lat_offset, int64_t lon_offset, int64_t granularity, int32_t ts_granularity)
279
+ {
280
+ double lat = 0;
281
+ double lon = 0;
282
+ unsigned j;
283
+
284
+ for(size_t i = 0; i < group->n_nodes; i++)
285
+ {
286
+ Node *node = group->nodes[i];
287
+ VALUE node_out = rb_hash_new();
288
+
289
+ lat = NANO_DEGREE * (lat_offset + (node->lat * granularity));
290
+ lon = NANO_DEGREE * (lon_offset + (node->lon * granularity));
291
+
292
+ rb_hash_aset(node_out, STR2SYM("id"), LL2NUM(node->id));
293
+ rb_hash_aset(node_out, STR2SYM("lat"), FIX8(rb_float_new(lat)));
294
+ rb_hash_aset(node_out, STR2SYM("lon"), FIX8(rb_float_new(lon)));
295
+
296
+ if(node->info)
297
+ add_info(node_out, node->info, string_table, ts_granularity);
298
+
299
+ VALUE tags = rb_hash_new();
300
+
301
+ for(j = 0; j < node->n_keys; j++)
302
+ {
303
+ char *key = parse_binary_str(string_table->s[node->keys[j]]);
304
+ char *value = parse_binary_str(string_table->s[node->vals[j]]);
305
+
306
+ rb_hash_aset(tags, str_new(key), str_new(value));
307
+ }
308
+
309
+ rb_hash_aset(node_out, STR2SYM("tags"), tags);
310
+ rb_ary_push(out, node_out);
311
+ }
312
+ }
313
+
314
+ static void process_dense_nodes(VALUE out, DenseNodes *dense_nodes, StringTable *string_table, int64_t lat_offset, int64_t lon_offset, int64_t granularity, int32_t ts_granularity)
315
+ {
316
+ uint64_t node_id = 0;
317
+ int64_t delta_lat = 0;
318
+ int64_t delta_lon = 0;
319
+ int64_t delta_timestamp = 0;
320
+ int64_t delta_changeset = 0;
321
+ int32_t delta_user_sid = 0;
322
+ int32_t delta_uid = 0;
323
+
324
+ double lat = 0;
325
+ double lon = 0;
326
+
327
+ unsigned j = 0;
328
+
329
+ for(size_t i = 0; i < dense_nodes->n_id; i++)
330
+ {
331
+ VALUE node = rb_hash_new();
332
+
333
+ node_id += dense_nodes->id[i];
334
+ delta_lat += dense_nodes->lat[i];
335
+ delta_lon += dense_nodes->lon[i];
336
+
337
+ lat = NANO_DEGREE * (lat_offset + (delta_lat * granularity));
338
+ lon = NANO_DEGREE * (lon_offset + (delta_lon * granularity));
339
+
340
+ rb_hash_aset(node, STR2SYM("id"), LL2NUM(node_id));
341
+ rb_hash_aset(node, STR2SYM("lat"), FIX8(rb_float_new(lat)));
342
+ rb_hash_aset(node, STR2SYM("lon"), FIX8(rb_float_new(lon)));
343
+
344
+ // Extract info
345
+ if(dense_nodes->denseinfo)
346
+ {
347
+ delta_timestamp += dense_nodes->denseinfo->timestamp[i];
348
+ delta_changeset += dense_nodes->denseinfo->changeset[i];
349
+ delta_user_sid += dense_nodes->denseinfo->user_sid[i];
350
+ delta_uid += dense_nodes->denseinfo->uid[i];
351
+
352
+ Info info = {
353
+ .version = dense_nodes->denseinfo->version[i],
354
+ .timestamp = delta_timestamp,
355
+ .changeset = delta_changeset,
356
+ .user_sid = delta_user_sid,
357
+ .uid = delta_uid
358
+ };
359
+
360
+ add_info(node, &info, string_table, ts_granularity);
361
+ }
362
+
363
+ // Extract tags
364
+ VALUE tags = rb_hash_new();
365
+
366
+ if(j < dense_nodes->n_keys_vals)
367
+ {
368
+ while((dense_nodes->keys_vals[j] != 0) && (j < dense_nodes->n_keys_vals))
369
+ {
370
+ char *key = parse_binary_str(string_table->s[dense_nodes->keys_vals[j]]);
371
+ char *value = parse_binary_str(string_table->s[dense_nodes->keys_vals[j+1]]);
372
+
373
+ rb_hash_aset(tags, str_new(key), str_new(value));
374
+
375
+ j += 2;
376
+ }
377
+ j += 1;
378
+ }
379
+
380
+ rb_hash_aset(node, STR2SYM("tags"), tags);
381
+ rb_ary_push(out, node);
382
+ }
383
+ }
384
+
385
+ static void process_ways(VALUE out, PrimitiveGroup *group, StringTable *string_table, int32_t ts_granularity)
386
+ {
387
+ unsigned j, k;
388
+ int64_t delta_refs = 0;
389
+
390
+ for(size_t i = 0; i < group->n_ways; i++)
391
+ {
392
+ Way *way = group->ways[i];
393
+
394
+ VALUE way_out = rb_hash_new();
395
+
396
+ rb_hash_aset(way_out, STR2SYM("id"), LL2NUM(way->id));
397
+
398
+ // Extract tags
399
+ VALUE tags = rb_hash_new();
400
+
401
+ for(j = 0; j < way->n_keys; j++)
402
+ {
403
+ char *key = parse_binary_str(string_table->s[way->keys[j]]);
404
+ char *value = parse_binary_str(string_table->s[way->vals[j]]);
405
+
406
+ rb_hash_aset(tags, str_new(key), str_new(value));
407
+ }
408
+
409
+ // Extract refs
410
+ VALUE refs = rb_ary_new();
411
+
412
+ for(k = 0; k < way->n_refs; k++)
413
+ {
414
+ delta_refs += way->refs[k];
415
+ rb_ary_push(refs, LL2NUM(delta_refs));
416
+ }
417
+
418
+ // Extract info
419
+ if(way->info)
420
+ add_info(way_out, way->info, string_table, ts_granularity);
421
+
422
+ rb_hash_aset(way_out, STR2SYM("tags"), tags);
423
+ rb_hash_aset(way_out, STR2SYM("refs"), refs);
424
+ rb_ary_push(out, way_out);
425
+ }
426
+ }
427
+
428
+ static void process_relations(VALUE out, PrimitiveGroup *group, StringTable *string_table, int32_t ts_granularity)
429
+ {
430
+ unsigned j, k;
431
+
432
+ for(size_t i = 0; i < group->n_relations; i++)
433
+ {
434
+ Relation *relation = group->relations[i];
435
+ VALUE relation_out = rb_hash_new();
436
+
437
+ rb_hash_aset(relation_out, STR2SYM("id"), LL2NUM(relation->id));
438
+
439
+ // Extract tags
440
+ VALUE tags = rb_hash_new();
441
+
442
+ for(j = 0; j < relation->n_keys; j++)
443
+ {
444
+ char *key = parse_binary_str(string_table->s[relation->keys[j]]);
445
+ char *value = parse_binary_str(string_table->s[relation->vals[j]]);
446
+
447
+ rb_hash_aset(tags, str_new(key), str_new(value));
448
+ }
449
+
450
+ // Extract members
451
+ VALUE members = rb_hash_new();
452
+ VALUE nodes = rb_ary_new();
453
+ VALUE ways = rb_ary_new();
454
+ VALUE relations = rb_ary_new();
455
+
456
+ int64_t delta_memids = 0;
457
+ char *role;
458
+
459
+ for(k = 0; k < relation->n_memids; k++)
460
+ {
461
+ VALUE member = rb_hash_new();
462
+
463
+ delta_memids += relation->memids[k];
464
+
465
+ rb_hash_aset(member, STR2SYM("id"), LL2NUM(delta_memids));
466
+
467
+ if(role = parse_binary_str(string_table->s[relation->roles_sid[k]]))
468
+ rb_hash_aset(member, STR2SYM("role"), str_new(role));
469
+
470
+ switch(relation->types[k])
471
+ {
472
+ case RELATION__MEMBER_TYPE__NODE:
473
+ rb_ary_push(nodes, member);
474
+ break;
475
+ case RELATION__MEMBER_TYPE__WAY:
476
+ rb_ary_push(ways, member);
477
+ break;
478
+ case RELATION__MEMBER_TYPE__RELATION:
479
+ rb_ary_push(relations, member);
480
+ break;
481
+ }
482
+ }
483
+
484
+ rb_hash_aset(members, STR2SYM("nodes"), nodes);
485
+ rb_hash_aset(members, STR2SYM("ways"), ways);
486
+ rb_hash_aset(members, STR2SYM("relations"), relations);
487
+
488
+ // Extract info
489
+ if(relation->info)
490
+ add_info(relation_out, relation->info, string_table, ts_granularity);
491
+
492
+ rb_hash_aset(relation_out, STR2SYM("tags"), tags);
493
+ rb_hash_aset(relation_out, STR2SYM("members"), members);
494
+ rb_ary_push(out, relation_out);
495
+ }
496
+ }
497
+
498
+ static VALUE parse_osm_data(VALUE obj)
499
+ {
500
+ FILE *input = DATA_PTR(obj);
501
+ BlobHeader *header = read_blob_header(input);
502
+
503
+ if(header == NULL)
504
+ return Qfalse;
505
+
506
+ if(strcmp("OSMData", header->type) != 0)
507
+ rb_raise(rb_eIOError, "OSMData not found");
508
+
509
+ void *blob = NULL;
510
+ size_t blob_length = 0, datasize = header->datasize;
511
+ PrimitiveBlock *primitive_block = NULL;
512
+
513
+ blob_header__free_unpacked(header, NULL);
514
+
515
+ blob = read_blob(input, datasize, &blob_length);
516
+ primitive_block = primitive_block__unpack(NULL, blob_length, blob);
517
+
518
+ free(blob);
519
+
520
+ if(primitive_block == NULL)
521
+ rb_raise(rb_eIOError, "Unable to unpack the PrimitiveBlock");
522
+
523
+ int64_t lat_offset, lon_offset, granularity;
524
+ int32_t ts_granularity;
525
+
526
+ lat_offset = primitive_block->lat_offset;
527
+ lon_offset = primitive_block->lon_offset;
528
+ granularity = primitive_block->granularity;
529
+ ts_granularity = primitive_block->date_granularity;
530
+
531
+ StringTable *string_table = primitive_block->stringtable;
532
+
533
+ VALUE data = init_data_arr();
534
+ VALUE nodes = rb_hash_aref(data, STR2SYM("nodes"));
535
+ VALUE ways = rb_hash_aref(data, STR2SYM("ways"));
536
+ VALUE relations = rb_hash_aref(data, STR2SYM("relations"));
537
+
538
+ for(size_t i = 0; i < primitive_block->n_primitivegroup; i++)
539
+ {
540
+ PrimitiveGroup *primitive_group = primitive_block->primitivegroup[i];
541
+
542
+ if(primitive_group->nodes)
543
+ process_nodes(nodes, primitive_group, string_table, lat_offset, lon_offset, granularity, ts_granularity);
544
+
545
+ if(primitive_group->dense)
546
+ process_dense_nodes(nodes, primitive_group->dense, string_table, lat_offset, lon_offset, granularity, ts_granularity);
547
+
548
+ if(primitive_group->ways)
549
+ process_ways(ways, primitive_group, string_table, ts_granularity);
550
+
551
+ if(primitive_group->relations)
552
+ process_relations(relations, primitive_group, string_table, ts_granularity);
553
+ }
554
+
555
+ rb_iv_set(obj, "@data", data);
556
+
557
+ primitive_block__free_unpacked(primitive_block, NULL);
558
+
559
+ return Qtrue;
560
+ }
561
+
562
+ static VALUE header_getter(VALUE obj)
563
+ {
564
+ return rb_iv_get(obj, "@header");
565
+ }
566
+
567
+ static VALUE data_getter(VALUE obj)
568
+ {
569
+ return rb_iv_get(obj, "@data");
570
+ }
571
+
572
+ static VALUE nodes_getter(VALUE obj)
573
+ {
574
+ VALUE data = rb_iv_get(obj, "@data");
575
+
576
+ return rb_hash_aref(data, STR2SYM("nodes"));
577
+ }
578
+
579
+ static VALUE ways_getter(VALUE obj)
580
+ {
581
+ VALUE data = rb_iv_get(obj, "@data");
582
+
583
+ return rb_hash_aref(data, STR2SYM("ways"));
584
+ }
585
+
586
+ static VALUE relations_getter(VALUE obj)
587
+ {
588
+ VALUE data = rb_iv_get(obj, "@data");
589
+
590
+ return rb_hash_aref(data, STR2SYM("relations"));
591
+ }
592
+
593
+ static VALUE iterate(VALUE obj)
594
+ {
595
+ if (!rb_block_given_p())
596
+ rb_raise(rb_eArgError, "A block is expected");
597
+
598
+ while(parse_osm_data(obj) != Qfalse)
599
+ {
600
+ VALUE nodes = nodes_getter(obj);
601
+ VALUE ways = ways_getter(obj);
602
+ VALUE relations = relations_getter(obj);
603
+
604
+ rb_yield_values(3, nodes, ways, relations);
605
+ }
606
+
607
+ return Qnil;
608
+ }
609
+
610
+ static VALUE initialize(VALUE obj, VALUE filename)
611
+ {
612
+ // Check that filename is a string
613
+ Check_Type(filename, T_STRING);
614
+
615
+ // Check if the file has a valid extension
616
+ if(!strcmp(".osm.pbf", StringValuePtr(filename) + RSTRING_LEN(filename)-8) == 0)
617
+ rb_raise(rb_eArgError, "Not a osm.pbf file");
618
+
619
+ // Try to open the given file
620
+ if(!(DATA_PTR(obj) = fopen(StringValuePtr(filename), "rb")))
621
+ rb_raise(rb_eIOError, "Unable to open the file");
622
+
623
+ // Store the filename
624
+ rb_iv_set(obj, "@filename", filename);
625
+
626
+ // Every osm.pbf file must have an OSMHeader at the beginning.
627
+ // Failing to find it means that the file is corrupt or invalid.
628
+ parse_osm_header(obj, DATA_PTR(obj));
629
+
630
+ // Parse the firts OSMData fileblock
631
+ parse_osm_data(obj);
632
+
633
+ return obj;
634
+ }
635
+
636
+ static VALUE alloc_file(VALUE klass)
637
+ {
638
+ FILE *input = NULL;
639
+
640
+ return Data_Wrap_Struct(klass, NULL, fclose, input);
641
+ }
642
+
643
+ static VALUE inspect(VALUE obj)
644
+ {
645
+ const char *cname = rb_obj_classname(obj);
646
+ return rb_sprintf("#<%s:%p>", cname, (void*)obj);
647
+ }
648
+
649
+ void Init_pbf_parser(void)
650
+ {
651
+ VALUE klass = rb_define_class("PbfParser", rb_cObject);
652
+
653
+ rb_define_alloc_func(klass, alloc_file);
654
+ rb_define_method(klass, "initialize", initialize, 1);
655
+ rb_define_method(klass, "inspect", inspect, 0);
656
+ rb_define_method(klass, "next", parse_osm_data, 0);
657
+ rb_define_method(klass, "each", iterate, 0);
658
+
659
+ // Getters
660
+ rb_define_method(klass, "header", header_getter, 0);
661
+ rb_define_method(klass, "data", data_getter, 0);
662
+ rb_define_method(klass, "nodes", nodes_getter, 0);
663
+ rb_define_method(klass, "ways", ways_getter, 0);
664
+ rb_define_method(klass, "relations", relations_getter, 0);
665
+ }
@@ -0,0 +1,32 @@
1
+ #ifndef PBF_PARSER_H
2
+ #define PBF_PARSER_H
3
+
4
+ #include <stdio.h>
5
+ #include <stdlib.h>
6
+ #include <ruby.h>
7
+
8
+ #ifdef HAVE_RUBY_ENCODING_H
9
+ #include <ruby/encoding.h>
10
+ #endif
11
+
12
+ #include "zlib.h"
13
+
14
+ #include "fileformat.pb-c.h"
15
+ #include "osmformat.pb-c.h"
16
+
17
+ #define MAX_BLOB_HEADER_SIZE 64 * 1024
18
+ #define MAX_BLOB_SIZE 32 * 1024 * 1024
19
+
20
+ #define NANO_DEGREE .000000001
21
+
22
+ #define STR2SYM(str) ID2SYM(rb_intern(str))
23
+
24
+ /*
25
+ Deal with floating point accuracy problems while calculating lat and lon.
26
+ 8 decimal places gives a precision of ~1mm and AFAIK that's the maximum
27
+ precision used with OSM data (and most geo tools). */
28
+ #define FIX8(num) rb_funcall(num, rb_intern("round"), 1, INT2NUM(8))
29
+
30
+ void Init_pbf_parser(void);
31
+
32
+ #endif
data/lib/pbf_parser.rb ADDED
@@ -0,0 +1 @@
1
+ require 'pbf_parser/pbf_parser'