pbf_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,665 @@
1
+ #include "pbf_parser.h"
2
+
3
+ /*
4
+ Set string encoding to UTF8
5
+ See http://tenderlovemaking.com/2009/06/26/string-encoding-in-ruby-1-9-c-extensions.html
6
+ */
7
+ static VALUE str_new(const char *str) {
8
+ VALUE string = rb_str_new2(str);
9
+
10
+ #ifdef HAVE_RUBY_ENCODING_H
11
+ int enc = rb_enc_find_index("UTF-8");
12
+ if(enc != -1) rb_enc_associate_index(string, enc);
13
+ #endif
14
+
15
+ return string;
16
+ }
17
+
18
+ static size_t get_header_size(FILE *input)
19
+ {
20
+ char buffer[4];
21
+
22
+ if(fread(buffer, sizeof(buffer), 1, input) != 1)
23
+ return 0;
24
+
25
+ return ntohl(*((size_t *)buffer));
26
+ }
27
+
28
+ static char *parse_binary_str(ProtobufCBinaryData bstr)
29
+ {
30
+ char *str = calloc(bstr.len + 1, 1);
31
+ memcpy(str, bstr.data, bstr.len);
32
+
33
+ return str;
34
+ }
35
+
36
+ static BlobHeader *read_blob_header(FILE *input)
37
+ {
38
+ void *buffer;
39
+ size_t length = get_header_size(input);
40
+ BlobHeader *header = NULL;
41
+
42
+ if(length < 1 || length > MAX_BLOB_HEADER_SIZE)
43
+ {
44
+ if(feof(input))
45
+ return NULL;
46
+ else
47
+ rb_raise(rb_eIOError, "Invalid blob header size");
48
+ }
49
+
50
+ if(!(buffer = malloc(length)))
51
+ rb_raise(rb_eNoMemError, "Unable to allocate memory for the blob header");
52
+
53
+ if(!fread(buffer, length, 1, input))
54
+ {
55
+ free(buffer);
56
+ rb_raise(rb_eIOError, "Unable to read the blob header");
57
+ }
58
+
59
+ header = blob_header__unpack(NULL, length, buffer);
60
+
61
+ free(buffer);
62
+
63
+ if(header == NULL)
64
+ rb_raise(rb_eIOError, "Unable to unpack the blob header");
65
+
66
+ return header;
67
+ }
68
+
69
+ static void *read_blob(FILE *input, size_t length, size_t *raw_length)
70
+ {
71
+ VALUE exc = Qnil;
72
+ void *buffer = NULL;
73
+ Blob *blob = NULL;
74
+
75
+ if(length < 1 || length > MAX_BLOB_SIZE)
76
+ rb_raise(rb_eIOError, "Invalid blob size");
77
+
78
+ if(!(buffer = malloc(length)))
79
+ rb_raise(rb_eNoMemError, "Unable to allocate memory for the blob");
80
+
81
+ if(fread(buffer, length, 1, input))
82
+ blob = blob__unpack(NULL, length, buffer);
83
+
84
+ free(buffer);
85
+
86
+ if(blob == NULL)
87
+ rb_raise(rb_eIOError, "Unable to read the blob");
88
+
89
+ void *data = NULL;
90
+
91
+ if(blob->has_raw)
92
+ {
93
+ if(!(data = malloc(blob->raw.len)))
94
+ {
95
+ exc = rb_exc_new2(rb_eNoMemError, "Unable to allocate memory for the data");
96
+ goto exit_nicely;
97
+ }
98
+
99
+ memcpy(data, blob->raw.data, blob->raw.len);
100
+ *raw_length = blob->raw.len;
101
+ }
102
+ else if(blob->has_zlib_data)
103
+ {
104
+ if(!(data = malloc(MAX_BLOB_SIZE)))
105
+ {
106
+ exc = rb_exc_new2(rb_eNoMemError, "Unable to allocate memory for the data");
107
+ goto exit_nicely;
108
+ }
109
+
110
+ int ret;
111
+ z_stream strm;
112
+
113
+ strm.zalloc = Z_NULL;
114
+ strm.zfree = Z_NULL;
115
+ strm.opaque = Z_NULL;
116
+ strm.avail_in = (unsigned int)blob->zlib_data.len;
117
+ strm.next_in = blob->zlib_data.data;
118
+ strm.avail_out = blob->raw_size;
119
+ strm.next_out = data;
120
+
121
+ ret = inflateInit(&strm);
122
+
123
+ if (ret != Z_OK)
124
+ {
125
+ exc = rb_exc_new2(rb_eRuntimeError, "Zlib init failed");
126
+ goto exit_nicely;
127
+ }
128
+
129
+ ret = inflate(&strm, Z_NO_FLUSH);
130
+
131
+ (void)inflateEnd(&strm);
132
+
133
+ if (ret != Z_STREAM_END)
134
+ {
135
+ exc = rb_exc_new2(rb_eRuntimeError, "Zlib compression failed");
136
+ goto exit_nicely;
137
+ }
138
+
139
+ *raw_length = blob->raw_size;
140
+ }
141
+ else if(blob->has_lzma_data)
142
+ {
143
+ exc = rb_exc_new2(rb_eNotImpError, "LZMA compression is not supported");
144
+ goto exit_nicely;
145
+ }
146
+ else
147
+ {
148
+ exc = rb_exc_new2(rb_eNotImpError, "Unknown blob format");
149
+ goto exit_nicely;
150
+ }
151
+
152
+ exit_nicely:
153
+ if(blob) blob__free_unpacked(blob, NULL);
154
+ if(!data) free(data);
155
+ if(exc != Qnil) rb_exc_raise(exc);
156
+
157
+ return data;
158
+ }
159
+
160
+ static VALUE init_data_arr()
161
+ {
162
+ VALUE data = rb_hash_new();
163
+
164
+ rb_hash_aset(data, STR2SYM("nodes"), rb_ary_new());
165
+ rb_hash_aset(data, STR2SYM("ways"), rb_ary_new());
166
+ rb_hash_aset(data, STR2SYM("relations"), rb_ary_new());
167
+
168
+ return data;
169
+ }
170
+
171
+ static void add_info(VALUE hash, Info *info, StringTable *string_table, double ts_granularity)
172
+ {
173
+ VALUE version, timestamp, changeset, uid, user;
174
+
175
+ version = info->version ? INT2NUM(info->version) : Qnil;
176
+ timestamp = info->timestamp ? LL2NUM(info->timestamp * ts_granularity) : Qnil;
177
+ changeset = info->changeset ? LL2NUM(info->changeset) : Qnil;
178
+ uid = info->uid ? INT2NUM(info->uid) : Qnil;
179
+ user = info->user_sid ? str_new(parse_binary_str(string_table->s[info->user_sid])) : Qnil;
180
+
181
+ rb_hash_aset(hash, STR2SYM("version"), version);
182
+ rb_hash_aset(hash, STR2SYM("timestamp"), timestamp);
183
+ rb_hash_aset(hash, STR2SYM("changeset"), changeset);
184
+ rb_hash_aset(hash, STR2SYM("uid"), uid);
185
+ rb_hash_aset(hash, STR2SYM("user"), user);
186
+ }
187
+
188
+ static int parse_osm_header(VALUE obj, FILE *input)
189
+ {
190
+ BlobHeader *header = read_blob_header(input);
191
+
192
+ // EOF reached
193
+ if(header == NULL)
194
+ rb_raise(rb_eEOFError, "EOF reached without finding data");
195
+
196
+ if(strcmp("OSMHeader", header->type) != 0)
197
+ rb_raise(rb_eIOError, "OSMHeader not found, probably the file is corrupt or invalid");
198
+
199
+ void *blob = NULL;
200
+ size_t blob_length = 0, datasize = header->datasize;
201
+ HeaderBlock *header_block = NULL;
202
+
203
+ blob_header__free_unpacked(header, NULL);
204
+
205
+ blob = read_blob(input, datasize, &blob_length);
206
+ header_block = header_block__unpack(NULL, blob_length, blob);
207
+
208
+ if(header_block == NULL)
209
+ rb_raise(rb_eIOError, "Unable to unpack the HeaderBlock");
210
+
211
+ VALUE header_hash = rb_hash_new();
212
+ VALUE bbox_hash = rb_hash_new();
213
+
214
+ VALUE required_features = Qnil;
215
+ VALUE optional_features = Qnil;
216
+ VALUE writingprogram = Qnil;
217
+ VALUE source = Qnil;
218
+
219
+ VALUE osmosis_replication_timestamp = Qnil;
220
+ VALUE osmosis_replication_sequence_number = Qnil;
221
+ VALUE osmosis_replication_base_url = Qnil;
222
+
223
+ if(header_block->n_required_features > 0)
224
+ {
225
+ required_features = rb_ary_new();
226
+
227
+ for(int i = 0; i < (int)header_block->n_required_features; i++)
228
+ rb_ary_push(required_features, str_new(header_block->required_features[i]));
229
+ }
230
+
231
+ if(header_block->n_optional_features > 0)
232
+ {
233
+ optional_features = rb_ary_new();
234
+
235
+ for(int i = 0; i < (int)header_block->n_optional_features; i++)
236
+ rb_ary_push(optional_features, str_new(header_block->optional_features[i]));
237
+ }
238
+
239
+ if(header_block->writingprogram)
240
+ writingprogram = str_new(header_block->writingprogram);
241
+
242
+ if(header_block->source)
243
+ source = str_new(header_block->source);
244
+
245
+ if(header_block->bbox)
246
+ {
247
+ rb_hash_aset(bbox_hash, STR2SYM("top"), rb_float_new(header_block->bbox->top * NANO_DEGREE));
248
+ rb_hash_aset(bbox_hash, STR2SYM("right"), rb_float_new(header_block->bbox->right * NANO_DEGREE));
249
+ rb_hash_aset(bbox_hash, STR2SYM("bottom"), rb_float_new(header_block->bbox->bottom * NANO_DEGREE));
250
+ rb_hash_aset(bbox_hash, STR2SYM("left"), rb_float_new(header_block->bbox->left * NANO_DEGREE));
251
+ }
252
+
253
+ if(header_block->has_osmosis_replication_timestamp)
254
+ osmosis_replication_timestamp = ULL2NUM(header_block->osmosis_replication_timestamp);
255
+
256
+ if(header_block->has_osmosis_replication_sequence_number)
257
+ osmosis_replication_sequence_number = ULL2NUM(header_block->osmosis_replication_sequence_number);
258
+
259
+ if(header_block->osmosis_replication_base_url)
260
+ osmosis_replication_base_url = str_new(header_block->osmosis_replication_base_url);
261
+
262
+ rb_hash_aset(header_hash, str_new("bbox"), bbox_hash);
263
+ rb_hash_aset(header_hash, str_new("required_features"), required_features);
264
+ rb_hash_aset(header_hash, str_new("optional_features"), optional_features);
265
+ rb_hash_aset(header_hash, str_new("writing_program"), writingprogram);
266
+ rb_hash_aset(header_hash, str_new("source"), source);
267
+ rb_hash_aset(header_hash, str_new("osmosis_replication_timestamp"), osmosis_replication_timestamp);
268
+ rb_hash_aset(header_hash, str_new("osmosis_replication_sequence_number"), osmosis_replication_sequence_number);
269
+ rb_hash_aset(header_hash, str_new("osmosis_replication_base_url"), osmosis_replication_base_url);
270
+
271
+ rb_iv_set(obj, "@header", header_hash);
272
+
273
+ header_block__free_unpacked(header_block, NULL);
274
+
275
+ return 1;
276
+ }
277
+
278
+ static void process_nodes(VALUE out, PrimitiveGroup *group, StringTable *string_table, int64_t lat_offset, int64_t lon_offset, int64_t granularity, int32_t ts_granularity)
279
+ {
280
+ double lat = 0;
281
+ double lon = 0;
282
+ unsigned j;
283
+
284
+ for(size_t i = 0; i < group->n_nodes; i++)
285
+ {
286
+ Node *node = group->nodes[i];
287
+ VALUE node_out = rb_hash_new();
288
+
289
+ lat = NANO_DEGREE * (lat_offset + (node->lat * granularity));
290
+ lon = NANO_DEGREE * (lon_offset + (node->lon * granularity));
291
+
292
+ rb_hash_aset(node_out, STR2SYM("id"), LL2NUM(node->id));
293
+ rb_hash_aset(node_out, STR2SYM("lat"), FIX8(rb_float_new(lat)));
294
+ rb_hash_aset(node_out, STR2SYM("lon"), FIX8(rb_float_new(lon)));
295
+
296
+ if(node->info)
297
+ add_info(node_out, node->info, string_table, ts_granularity);
298
+
299
+ VALUE tags = rb_hash_new();
300
+
301
+ for(j = 0; j < node->n_keys; j++)
302
+ {
303
+ char *key = parse_binary_str(string_table->s[node->keys[j]]);
304
+ char *value = parse_binary_str(string_table->s[node->vals[j]]);
305
+
306
+ rb_hash_aset(tags, str_new(key), str_new(value));
307
+ }
308
+
309
+ rb_hash_aset(node_out, STR2SYM("tags"), tags);
310
+ rb_ary_push(out, node_out);
311
+ }
312
+ }
313
+
314
+ static void process_dense_nodes(VALUE out, DenseNodes *dense_nodes, StringTable *string_table, int64_t lat_offset, int64_t lon_offset, int64_t granularity, int32_t ts_granularity)
315
+ {
316
+ uint64_t node_id = 0;
317
+ int64_t delta_lat = 0;
318
+ int64_t delta_lon = 0;
319
+ int64_t delta_timestamp = 0;
320
+ int64_t delta_changeset = 0;
321
+ int32_t delta_user_sid = 0;
322
+ int32_t delta_uid = 0;
323
+
324
+ double lat = 0;
325
+ double lon = 0;
326
+
327
+ unsigned j = 0;
328
+
329
+ for(size_t i = 0; i < dense_nodes->n_id; i++)
330
+ {
331
+ VALUE node = rb_hash_new();
332
+
333
+ node_id += dense_nodes->id[i];
334
+ delta_lat += dense_nodes->lat[i];
335
+ delta_lon += dense_nodes->lon[i];
336
+
337
+ lat = NANO_DEGREE * (lat_offset + (delta_lat * granularity));
338
+ lon = NANO_DEGREE * (lon_offset + (delta_lon * granularity));
339
+
340
+ rb_hash_aset(node, STR2SYM("id"), LL2NUM(node_id));
341
+ rb_hash_aset(node, STR2SYM("lat"), FIX8(rb_float_new(lat)));
342
+ rb_hash_aset(node, STR2SYM("lon"), FIX8(rb_float_new(lon)));
343
+
344
+ // Extract info
345
+ if(dense_nodes->denseinfo)
346
+ {
347
+ delta_timestamp += dense_nodes->denseinfo->timestamp[i];
348
+ delta_changeset += dense_nodes->denseinfo->changeset[i];
349
+ delta_user_sid += dense_nodes->denseinfo->user_sid[i];
350
+ delta_uid += dense_nodes->denseinfo->uid[i];
351
+
352
+ Info info = {
353
+ .version = dense_nodes->denseinfo->version[i],
354
+ .timestamp = delta_timestamp,
355
+ .changeset = delta_changeset,
356
+ .user_sid = delta_user_sid,
357
+ .uid = delta_uid
358
+ };
359
+
360
+ add_info(node, &info, string_table, ts_granularity);
361
+ }
362
+
363
+ // Extract tags
364
+ VALUE tags = rb_hash_new();
365
+
366
+ if(j < dense_nodes->n_keys_vals)
367
+ {
368
+ while((dense_nodes->keys_vals[j] != 0) && (j < dense_nodes->n_keys_vals))
369
+ {
370
+ char *key = parse_binary_str(string_table->s[dense_nodes->keys_vals[j]]);
371
+ char *value = parse_binary_str(string_table->s[dense_nodes->keys_vals[j+1]]);
372
+
373
+ rb_hash_aset(tags, str_new(key), str_new(value));
374
+
375
+ j += 2;
376
+ }
377
+ j += 1;
378
+ }
379
+
380
+ rb_hash_aset(node, STR2SYM("tags"), tags);
381
+ rb_ary_push(out, node);
382
+ }
383
+ }
384
+
385
+ static void process_ways(VALUE out, PrimitiveGroup *group, StringTable *string_table, int32_t ts_granularity)
386
+ {
387
+ unsigned j, k;
388
+ int64_t delta_refs = 0;
389
+
390
+ for(size_t i = 0; i < group->n_ways; i++)
391
+ {
392
+ Way *way = group->ways[i];
393
+
394
+ VALUE way_out = rb_hash_new();
395
+
396
+ rb_hash_aset(way_out, STR2SYM("id"), LL2NUM(way->id));
397
+
398
+ // Extract tags
399
+ VALUE tags = rb_hash_new();
400
+
401
+ for(j = 0; j < way->n_keys; j++)
402
+ {
403
+ char *key = parse_binary_str(string_table->s[way->keys[j]]);
404
+ char *value = parse_binary_str(string_table->s[way->vals[j]]);
405
+
406
+ rb_hash_aset(tags, str_new(key), str_new(value));
407
+ }
408
+
409
+ // Extract refs
410
+ VALUE refs = rb_ary_new();
411
+
412
+ for(k = 0; k < way->n_refs; k++)
413
+ {
414
+ delta_refs += way->refs[k];
415
+ rb_ary_push(refs, LL2NUM(delta_refs));
416
+ }
417
+
418
+ // Extract info
419
+ if(way->info)
420
+ add_info(way_out, way->info, string_table, ts_granularity);
421
+
422
+ rb_hash_aset(way_out, STR2SYM("tags"), tags);
423
+ rb_hash_aset(way_out, STR2SYM("refs"), refs);
424
+ rb_ary_push(out, way_out);
425
+ }
426
+ }
427
+
428
+ static void process_relations(VALUE out, PrimitiveGroup *group, StringTable *string_table, int32_t ts_granularity)
429
+ {
430
+ unsigned j, k;
431
+
432
+ for(size_t i = 0; i < group->n_relations; i++)
433
+ {
434
+ Relation *relation = group->relations[i];
435
+ VALUE relation_out = rb_hash_new();
436
+
437
+ rb_hash_aset(relation_out, STR2SYM("id"), LL2NUM(relation->id));
438
+
439
+ // Extract tags
440
+ VALUE tags = rb_hash_new();
441
+
442
+ for(j = 0; j < relation->n_keys; j++)
443
+ {
444
+ char *key = parse_binary_str(string_table->s[relation->keys[j]]);
445
+ char *value = parse_binary_str(string_table->s[relation->vals[j]]);
446
+
447
+ rb_hash_aset(tags, str_new(key), str_new(value));
448
+ }
449
+
450
+ // Extract members
451
+ VALUE members = rb_hash_new();
452
+ VALUE nodes = rb_ary_new();
453
+ VALUE ways = rb_ary_new();
454
+ VALUE relations = rb_ary_new();
455
+
456
+ int64_t delta_memids = 0;
457
+ char *role;
458
+
459
+ for(k = 0; k < relation->n_memids; k++)
460
+ {
461
+ VALUE member = rb_hash_new();
462
+
463
+ delta_memids += relation->memids[k];
464
+
465
+ rb_hash_aset(member, STR2SYM("id"), LL2NUM(delta_memids));
466
+
467
+ if(role = parse_binary_str(string_table->s[relation->roles_sid[k]]))
468
+ rb_hash_aset(member, STR2SYM("role"), str_new(role));
469
+
470
+ switch(relation->types[k])
471
+ {
472
+ case RELATION__MEMBER_TYPE__NODE:
473
+ rb_ary_push(nodes, member);
474
+ break;
475
+ case RELATION__MEMBER_TYPE__WAY:
476
+ rb_ary_push(ways, member);
477
+ break;
478
+ case RELATION__MEMBER_TYPE__RELATION:
479
+ rb_ary_push(relations, member);
480
+ break;
481
+ }
482
+ }
483
+
484
+ rb_hash_aset(members, STR2SYM("nodes"), nodes);
485
+ rb_hash_aset(members, STR2SYM("ways"), ways);
486
+ rb_hash_aset(members, STR2SYM("relations"), relations);
487
+
488
+ // Extract info
489
+ if(relation->info)
490
+ add_info(relation_out, relation->info, string_table, ts_granularity);
491
+
492
+ rb_hash_aset(relation_out, STR2SYM("tags"), tags);
493
+ rb_hash_aset(relation_out, STR2SYM("members"), members);
494
+ rb_ary_push(out, relation_out);
495
+ }
496
+ }
497
+
498
+ static VALUE parse_osm_data(VALUE obj)
499
+ {
500
+ FILE *input = DATA_PTR(obj);
501
+ BlobHeader *header = read_blob_header(input);
502
+
503
+ if(header == NULL)
504
+ return Qfalse;
505
+
506
+ if(strcmp("OSMData", header->type) != 0)
507
+ rb_raise(rb_eIOError, "OSMData not found");
508
+
509
+ void *blob = NULL;
510
+ size_t blob_length = 0, datasize = header->datasize;
511
+ PrimitiveBlock *primitive_block = NULL;
512
+
513
+ blob_header__free_unpacked(header, NULL);
514
+
515
+ blob = read_blob(input, datasize, &blob_length);
516
+ primitive_block = primitive_block__unpack(NULL, blob_length, blob);
517
+
518
+ free(blob);
519
+
520
+ if(primitive_block == NULL)
521
+ rb_raise(rb_eIOError, "Unable to unpack the PrimitiveBlock");
522
+
523
+ int64_t lat_offset, lon_offset, granularity;
524
+ int32_t ts_granularity;
525
+
526
+ lat_offset = primitive_block->lat_offset;
527
+ lon_offset = primitive_block->lon_offset;
528
+ granularity = primitive_block->granularity;
529
+ ts_granularity = primitive_block->date_granularity;
530
+
531
+ StringTable *string_table = primitive_block->stringtable;
532
+
533
+ VALUE data = init_data_arr();
534
+ VALUE nodes = rb_hash_aref(data, STR2SYM("nodes"));
535
+ VALUE ways = rb_hash_aref(data, STR2SYM("ways"));
536
+ VALUE relations = rb_hash_aref(data, STR2SYM("relations"));
537
+
538
+ for(size_t i = 0; i < primitive_block->n_primitivegroup; i++)
539
+ {
540
+ PrimitiveGroup *primitive_group = primitive_block->primitivegroup[i];
541
+
542
+ if(primitive_group->nodes)
543
+ process_nodes(nodes, primitive_group, string_table, lat_offset, lon_offset, granularity, ts_granularity);
544
+
545
+ if(primitive_group->dense)
546
+ process_dense_nodes(nodes, primitive_group->dense, string_table, lat_offset, lon_offset, granularity, ts_granularity);
547
+
548
+ if(primitive_group->ways)
549
+ process_ways(ways, primitive_group, string_table, ts_granularity);
550
+
551
+ if(primitive_group->relations)
552
+ process_relations(relations, primitive_group, string_table, ts_granularity);
553
+ }
554
+
555
+ rb_iv_set(obj, "@data", data);
556
+
557
+ primitive_block__free_unpacked(primitive_block, NULL);
558
+
559
+ return Qtrue;
560
+ }
561
+
562
+ static VALUE header_getter(VALUE obj)
563
+ {
564
+ return rb_iv_get(obj, "@header");
565
+ }
566
+
567
+ static VALUE data_getter(VALUE obj)
568
+ {
569
+ return rb_iv_get(obj, "@data");
570
+ }
571
+
572
+ static VALUE nodes_getter(VALUE obj)
573
+ {
574
+ VALUE data = rb_iv_get(obj, "@data");
575
+
576
+ return rb_hash_aref(data, STR2SYM("nodes"));
577
+ }
578
+
579
+ static VALUE ways_getter(VALUE obj)
580
+ {
581
+ VALUE data = rb_iv_get(obj, "@data");
582
+
583
+ return rb_hash_aref(data, STR2SYM("ways"));
584
+ }
585
+
586
+ static VALUE relations_getter(VALUE obj)
587
+ {
588
+ VALUE data = rb_iv_get(obj, "@data");
589
+
590
+ return rb_hash_aref(data, STR2SYM("relations"));
591
+ }
592
+
593
+ static VALUE iterate(VALUE obj)
594
+ {
595
+ if (!rb_block_given_p())
596
+ rb_raise(rb_eArgError, "A block is expected");
597
+
598
+ while(parse_osm_data(obj) != Qfalse)
599
+ {
600
+ VALUE nodes = nodes_getter(obj);
601
+ VALUE ways = ways_getter(obj);
602
+ VALUE relations = relations_getter(obj);
603
+
604
+ rb_yield_values(3, nodes, ways, relations);
605
+ }
606
+
607
+ return Qnil;
608
+ }
609
+
610
+ static VALUE initialize(VALUE obj, VALUE filename)
611
+ {
612
+ // Check that filename is a string
613
+ Check_Type(filename, T_STRING);
614
+
615
+ // Check if the file has a valid extension
616
+ if(!strcmp(".osm.pbf", StringValuePtr(filename) + RSTRING_LEN(filename)-8) == 0)
617
+ rb_raise(rb_eArgError, "Not a osm.pbf file");
618
+
619
+ // Try to open the given file
620
+ if(!(DATA_PTR(obj) = fopen(StringValuePtr(filename), "rb")))
621
+ rb_raise(rb_eIOError, "Unable to open the file");
622
+
623
+ // Store the filename
624
+ rb_iv_set(obj, "@filename", filename);
625
+
626
+ // Every osm.pbf file must have an OSMHeader at the beginning.
627
+ // Failing to find it means that the file is corrupt or invalid.
628
+ parse_osm_header(obj, DATA_PTR(obj));
629
+
630
+ // Parse the firts OSMData fileblock
631
+ parse_osm_data(obj);
632
+
633
+ return obj;
634
+ }
635
+
636
+ static VALUE alloc_file(VALUE klass)
637
+ {
638
+ FILE *input = NULL;
639
+
640
+ return Data_Wrap_Struct(klass, NULL, fclose, input);
641
+ }
642
+
643
+ static VALUE inspect(VALUE obj)
644
+ {
645
+ const char *cname = rb_obj_classname(obj);
646
+ return rb_sprintf("#<%s:%p>", cname, (void*)obj);
647
+ }
648
+
649
+ void Init_pbf_parser(void)
650
+ {
651
+ VALUE klass = rb_define_class("PbfParser", rb_cObject);
652
+
653
+ rb_define_alloc_func(klass, alloc_file);
654
+ rb_define_method(klass, "initialize", initialize, 1);
655
+ rb_define_method(klass, "inspect", inspect, 0);
656
+ rb_define_method(klass, "next", parse_osm_data, 0);
657
+ rb_define_method(klass, "each", iterate, 0);
658
+
659
+ // Getters
660
+ rb_define_method(klass, "header", header_getter, 0);
661
+ rb_define_method(klass, "data", data_getter, 0);
662
+ rb_define_method(klass, "nodes", nodes_getter, 0);
663
+ rb_define_method(klass, "ways", ways_getter, 0);
664
+ rb_define_method(klass, "relations", relations_getter, 0);
665
+ }
@@ -0,0 +1,32 @@
1
+ #ifndef PBF_PARSER_H
2
+ #define PBF_PARSER_H
3
+
4
+ #include <stdio.h>
5
+ #include <stdlib.h>
6
+ #include <ruby.h>
7
+
8
+ #ifdef HAVE_RUBY_ENCODING_H
9
+ #include <ruby/encoding.h>
10
+ #endif
11
+
12
+ #include "zlib.h"
13
+
14
+ #include "fileformat.pb-c.h"
15
+ #include "osmformat.pb-c.h"
16
+
17
+ #define MAX_BLOB_HEADER_SIZE 64 * 1024
18
+ #define MAX_BLOB_SIZE 32 * 1024 * 1024
19
+
20
+ #define NANO_DEGREE .000000001
21
+
22
+ #define STR2SYM(str) ID2SYM(rb_intern(str))
23
+
24
+ /*
25
+ Deal with floating point accuracy problems while calculating lat and lon.
26
+ 8 decimal places gives a precision of ~1mm and AFAIK that's the maximum
27
+ precision used with OSM data (and most geo tools). */
28
+ #define FIX8(num) rb_funcall(num, rb_intern("round"), 1, INT2NUM(8))
29
+
30
+ void Init_pbf_parser(void);
31
+
32
+ #endif
data/lib/pbf_parser.rb ADDED
@@ -0,0 +1 @@
1
+ require 'pbf_parser/pbf_parser'