stata 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (C) 2011 by Unspace Interactive
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
20
+
data/README ADDED
@@ -0,0 +1,21 @@
1
+
2
+ to make the test C app, run:
3
+
4
+ cd ext
5
+ make -f Makefile_c -B
6
+ ./stata_c_test file.dta
7
+
8
+ this will compile the application, read file.dta and create a copy of it from the same data in out.dta
9
+
10
+
11
+ to make the test Ruby app, run:
12
+
13
+ cd ext
14
+ ruby extconf.rb
15
+ make
16
+ ./test.rb file.dta
17
+
18
+ this will do the same, but using the Ruby extension
19
+
20
+
21
+ If the app finds any issues with the data, it will fail an assert, which will give you line numbers to the source.
@@ -0,0 +1,187 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1
8
+ hdrdir = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1
9
+ arch_hdrdir = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/$(arch)
10
+ VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
11
+ prefix = $(DESTDIR)/Users/aanand/.rvm/rubies/ruby-1.9.2-p136
12
+ rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
13
+ exec_prefix = $(prefix)
14
+ vendorhdrdir = $(rubyhdrdir)/vendor_ruby
15
+ sitehdrdir = $(rubyhdrdir)/site_ruby
16
+ rubyhdrdir = $(includedir)/$(RUBY_BASE_NAME)-$(ruby_version)
17
+ vendordir = $(rubylibprefix)/vendor_ruby
18
+ sitedir = $(rubylibprefix)/site_ruby
19
+ ridir = $(datarootdir)/$(RI_BASE_NAME)
20
+ mandir = $(datarootdir)/man
21
+ localedir = $(datarootdir)/locale
22
+ libdir = $(exec_prefix)/lib
23
+ psdir = $(docdir)
24
+ pdfdir = $(docdir)
25
+ dvidir = $(docdir)
26
+ htmldir = $(docdir)
27
+ infodir = $(datarootdir)/info
28
+ docdir = $(datarootdir)/doc/$(PACKAGE)
29
+ oldincludedir = $(DESTDIR)/usr/include
30
+ includedir = $(prefix)/include
31
+ localstatedir = $(prefix)/var
32
+ sharedstatedir = $(prefix)/com
33
+ sysconfdir = $(prefix)/etc
34
+ datadir = $(datarootdir)
35
+ datarootdir = $(prefix)/share
36
+ libexecdir = $(exec_prefix)/libexec
37
+ sbindir = $(exec_prefix)/sbin
38
+ bindir = $(exec_prefix)/bin
39
+ rubylibdir = $(rubylibprefix)/$(ruby_version)
40
+ archdir = $(rubylibdir)/$(arch)
41
+ sitelibdir = $(sitedir)/$(ruby_version)
42
+ sitearchdir = $(sitelibdir)/$(sitearch)
43
+ vendorlibdir = $(vendordir)/$(ruby_version)
44
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
45
+
46
+ CC = gcc
47
+ CXX = g++
48
+ LIBRUBY = $(LIBRUBY_SO)
49
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
50
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
51
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
52
+ OUTFLAG = -o
53
+ COUTFLAG = -o
54
+
55
+ RUBY_EXTCONF_H =
56
+ cflags = $(optflags) $(debugflags) $(warnflags)
57
+ optflags = -O3
58
+ debugflags = -ggdb
59
+ warnflags = -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long
60
+ CFLAGS = -fno-common -isysroot /Developer/SDKs/MacOSX10.6.sdk -arch i386 -fno-common -pipe
61
+ INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
62
+ DEFS =
63
+ CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
64
+ CXXFLAGS = $(CFLAGS) $(cxxflags)
65
+ ldflags = -L. -Wl,-syslibroot /Developer/SDKs/MacOSX10.6.sdk -arch i386 -L/usr/local/lib
66
+ dldflags = -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress -Wl,-flat_namespace
67
+ ARCH_FLAG =
68
+ DLDFLAGS = $(ldflags) $(dldflags)
69
+ LDSHARED = $(CC) -dynamic -bundle
70
+ LDSHAREDXX = $(CXX) -dynamic -bundle
71
+ AR = ar
72
+ EXEEXT =
73
+
74
+ RUBY_BASE_NAME = ruby
75
+ RUBY_INSTALL_NAME = ruby
76
+ RUBY_SO_NAME = ruby.1.9.1
77
+ arch = i386-darwin10.5.0
78
+ sitearch = $(arch)
79
+ ruby_version = 1.9.1
80
+ ruby = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/bin/ruby
81
+ RUBY = $(ruby)
82
+ RM = rm -f
83
+ RM_RF = $(RUBY) -run -e rm -- -rf
84
+ RMDIRS = $(RUBY) -run -e rmdir -- -p
85
+ MAKEDIRS = mkdir -p
86
+ INSTALL = /usr/bin/install -c
87
+ INSTALL_PROG = $(INSTALL) -m 0755
88
+ INSTALL_DATA = $(INSTALL) -m 644
89
+ COPY = cp
90
+
91
+ #### End of system configuration section. ####
92
+
93
+ preload =
94
+
95
+ libpath = . $(libdir)
96
+ LIBPATH = -L. -L$(libdir)
97
+ DEFFILE =
98
+
99
+ CLEANFILES = mkmf.log
100
+ DISTCLEANFILES =
101
+ DISTCLEANDIRS =
102
+
103
+ extout =
104
+ extout_prefix =
105
+ target_prefix =
106
+ LOCAL_LIBS =
107
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
108
+ SRCS = Read.c Read.rb.c Stata.c Stata.rb.c Write.c Write.rb.c
109
+ OBJS = Read.o Read.rb.o Stata.o Stata.rb.o Write.o Write.rb.o
110
+ TARGET = Stata
111
+ DLLIB = $(TARGET).bundle
112
+ EXTSTATIC =
113
+ STATIC_LIB =
114
+
115
+ BINDIR = $(bindir)
116
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
117
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
118
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
119
+ HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
120
+ ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
121
+
122
+ TARGET_SO = $(DLLIB)
123
+ CLEANLIBS = $(TARGET).bundle
124
+ CLEANOBJS = *.o *.bak
125
+
126
+ all: $(DLLIB)
127
+ static: $(STATIC_LIB)
128
+ .PHONY: all install static install-so install-rb
129
+ .PHONY: clean clean-so clean-rb
130
+
131
+ clean-rb-default::
132
+ clean-rb::
133
+ clean-so::
134
+ clean: clean-so clean-rb-default clean-rb
135
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
136
+
137
+ distclean-rb-default::
138
+ distclean-rb::
139
+ distclean-so::
140
+ distclean: clean distclean-so distclean-rb-default distclean-rb
141
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
142
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
143
+ @-$(RMDIRS) $(DISTCLEANDIRS)
144
+
145
+ realclean: distclean
146
+ install: install-so install-rb
147
+
148
+ install-so: $(RUBYARCHDIR)
149
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
150
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
151
+ @-$(MAKEDIRS) $(@D)
152
+ $(INSTALL_PROG) $(DLLIB) $(@D)
153
+ install-rb: pre-install-rb install-rb-default
154
+ install-rb-default: pre-install-rb-default
155
+ pre-install-rb: Makefile
156
+ pre-install-rb-default: Makefile
157
+ $(RUBYARCHDIR):
158
+ $(MAKEDIRS) $@
159
+
160
+ site-install: site-install-so site-install-rb
161
+ site-install-so: install-so
162
+ site-install-rb: install-rb
163
+
164
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
165
+
166
+ .cc.o:
167
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
168
+
169
+ .cxx.o:
170
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
171
+
172
+ .cpp.o:
173
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
174
+
175
+ .C.o:
176
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
177
+
178
+ .c.o:
179
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
180
+
181
+ $(DLLIB): $(OBJS) Makefile
182
+ @-$(RM) $(@)
183
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
184
+
185
+
186
+
187
+ $(OBJS): $(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h $(arch_hdrdir)/ruby/config.h
@@ -0,0 +1,9 @@
1
+
2
+ OBJS = Read.o Write.o Stata.o
3
+ STD =
4
+
5
+ %.o: %.c
6
+ cc $(STD) -Wall $*.c -c -o $@
7
+
8
+ all: $(OBJS)
9
+ cc $(STD) $(OBJS) -Wall -o stata_c_test
@@ -0,0 +1,135 @@
1
+
2
+ #include "Read.h"
3
+ #include "Stata.h"
4
+ #include <math.h>
5
+ #include <stdio.h>
6
+ #include <stdlib.h>
7
+ #include <string.h>
8
+
9
+ int16_t read_int16_t() { int16_t t=0; fread(&t, sizeof(t), 1, fp); return ((swap_endian_needed==1) ? ((t>>8) | (t<<8)) : t); }
10
+ int32_t read_int32_t() { int32_t t=0; fread(&t, sizeof(t), 1, fp); return (swap_endian_needed==1) ? (int32_t)__builtin_bswap32(t) : t; }
11
+
12
+ uint16_t read_uint16_t() { uint16_t t=0; fread(&t, sizeof(t), 1, fp); return (swap_endian_needed==1) ? (t>>8)|(t<<8) : t; }
13
+ uint32_t read_uint32_t() { uint32_t t=0; fread(&t, sizeof(t), 1, fp); return (swap_endian_needed==1) ? __builtin_bswap32(t) : t; }
14
+
15
+ char * read_string(int length) { char * t = (char*)malloc(length+1); fread(t, length, 1, fp); t[length] = 0; return t; }
16
+ char ** read_strings(int num, int length) { char ** t = (char **)malloc(sizeof(char *)*num); int i; for (i = 0 ; i < num ; i++) t[i] = read_string(length); return t; }
17
+
18
+ float read_float_t() { uint32_t t=0; fread(&t, sizeof(t), 1, fp); if (swap_endian_needed==1) t = __builtin_bswap32(t); return *((float *)(void *)&t); }
19
+ double read_double_t() { uint64_t t=0; fread(&t, sizeof(t), 1, fp); if (swap_endian_needed==1) t = __builtin_bswap64(t); return *((double *)(void *)&t); }
20
+
21
+ struct stata_file * read_stata_file(char * filename)
22
+ {
23
+ if (verbose) printf("read file '%s'\n", filename);
24
+
25
+ long i,j;
26
+
27
+ struct stata_file * f = (struct stata_file *)malloc(sizeof(struct stata_file));
28
+ memset(f, 0, sizeof(struct stata_file));
29
+ f->filename = (char*)malloc(strlen(filename)+1);
30
+ strcpy(f->filename, filename);
31
+
32
+ fp = fopen(f->filename, "rb");
33
+ if (fp == NULL) { set_error(f, "error reading file"); return f; }
34
+
35
+ /*fseek(fp, 0 , SEEK_END);
36
+ long lSize = ftell(fp);
37
+ rewind(fp);
38
+ printf("file is %ld bytes long\n", lSize);*/
39
+
40
+
41
+ /* 5.1 Header */
42
+ f->ds_format = fgetc(fp); if (f->ds_format != 0x72) { set_error(f, "invalid file ds_format"); return f; }
43
+ f->byteorder = fgetc(fp); if (f->byteorder != 0x01 && f->byteorder != 0x02) { set_error(f, "invalid file byteorder"); return f; }
44
+ if (f->byteorder != get_host_endian()) swap_endian_needed = 1;
45
+ f->filetype = fgetc(fp); if (f->filetype != 0x01) { set_error(f, "invalid file filetype"); return f; }
46
+ f->unused = fgetc(fp); if (f->unused != 0x00) { set_error(f, "invalid unused values"); return f; }
47
+ f->nvar = read_uint16_t(); if (f->nvar <= 0) { set_error(f, "invalid nvar (< 1)"); return f; }
48
+ f->nobs = read_uint32_t(); if (f->nobs <= 0) { set_error(f, "invalid nobs (< 1)"); return f; }
49
+ fread(&f->data_label, sizeof(f->data_label), 1, fp);
50
+ fread(&f->time_stamp, sizeof(f->time_stamp), 1, fp);
51
+
52
+
53
+ /* 5.2 Descriptors */
54
+ f->typlist = (uint8_t *)malloc(f->nvar);
55
+ fread(f->typlist, 1, f->nvar, fp);
56
+ f->varlist = read_strings(f->nvar, 33);
57
+ f->srtlist = (uint16_t *)malloc(sizeof(uint16_t)*(f->nvar+1));
58
+ for (i = 0 ; i <= f->nvar ; i++) f->srtlist[i] = read_uint16_t();
59
+ f->fmtlist = read_strings(f->nvar, 49);
60
+ f->lbllist = read_strings(f->nvar, 33);
61
+
62
+
63
+ /* 5.3 Variable Labels */
64
+ f->variable_labels = read_strings(f->nvar, 81);
65
+
66
+
67
+ /* 5.4 Expansion Fields */
68
+ uint8_t data_type;
69
+ uint32_t len;
70
+ do {
71
+ data_type = fgetc(fp);
72
+ if (fread(&len, 4, 1, fp) != 1) { set_error(f, "fread from file failed"); return f; }
73
+ if (len > 0) for (i = 0 ; i < len ; i++) fgetc(fp);
74
+ } while(data_type != 0 || len != 0 || feof(fp));
75
+ /*printf ("read %d bytes of expansion fields\n", count);*/
76
+
77
+
78
+ /* 5.5 Data */
79
+ /*printf(" read 5.5 Data (%dx%d)\n", f->nobs, f->nvar);*/
80
+ f->obs = (struct stata_obs *)malloc(sizeof(struct stata_obs)*f->nobs);
81
+ for (j = 0 ; j < f->nobs ; j++)
82
+ {
83
+ f->obs[j].var = (struct stata_var *)malloc(sizeof(struct stata_var)*f->nvar);
84
+ for (i = 0 ; i < f->nvar ; i++)
85
+ {
86
+ struct stata_var * var = &f->obs[j].var[i];
87
+ memset(var, 0, sizeof(struct stata_var));
88
+
89
+ if (f->typlist[i] != 0 &&
90
+ f->typlist[i] < 245) { var->v_type = V_STR; var->v_str = read_string(f->typlist[i]); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
91
+ else if (f->typlist[i] == 251) { var->v_type = V_BYTE; if (fread(&var->v_byte, sizeof(var->v_byte), 1, fp) != 1) { set_error(f, "fread from file failed"); }; if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
92
+ else if (f->typlist[i] == 252) { var->v_type = V_INT; var->v_int = read_int16_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
93
+ else if (f->typlist[i] == 253) { var->v_type = V_LONG; var->v_long = read_int32_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
94
+ else if (f->typlist[i] == 254) { var->v_type = V_FLOAT; var->v_float = read_float_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
95
+ else if (f->typlist[i] == 255) { var->v_type = V_DOUBLE; var->v_double = read_double_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
96
+ else fprintf(stderr, "error.\n");
97
+
98
+ if (ferror(fp)) perror("error occurred");
99
+ if (feof(fp)) { fprintf(stderr, "eof error at var %ld (error:%d)\n", i, ferror(fp)); break; }
100
+ }
101
+ if (feof(fp)) { fprintf(stderr, "eof error at obs %ld (error:%d)\n", j, ferror(fp)); exit(1); }
102
+ }
103
+
104
+
105
+ /* 5.6 Value labels */
106
+ if (!feof(fp))
107
+ {
108
+ while (!feof(fp))
109
+ {
110
+ int32_t len = read_int32_t();
111
+ if (feof(fp) || len == 0) break;
112
+
113
+ f->num_vlt++;
114
+ f->vlt = (struct stata_vlt *)realloc(f->vlt, sizeof(struct stata_vlt)*f->num_vlt);
115
+ struct stata_vlt * vlt = &f->vlt[f->num_vlt-1];
116
+
117
+ if (fread(vlt->name, 33, 1, fp) != 1) { set_error(f, "fread from file failed"); return f; };
118
+ fgetc(fp); fgetc(fp); fgetc(fp); /* padding */
119
+
120
+ vlt->n = read_int32_t();
121
+ vlt->txtlen = read_int32_t(); if (vlt->txtlen >= 32000) { set_error(f, "value label table txtlen is > 32000"); return f; };
122
+ vlt->off = (int32_t*)malloc(sizeof(int32_t)*vlt->n);
123
+ for (i = 0 ; i < vlt->n ; i++) vlt->off[i] = read_int32_t();
124
+ vlt->val = (int32_t*)malloc(sizeof(int32_t)*vlt->n);
125
+ for (i = 0 ; i < vlt->n ; i++) vlt->val[i] = read_int32_t();
126
+ vlt->txtbuf = (char*)malloc(vlt->txtlen);
127
+ fread(vlt->txtbuf, vlt->txtlen, 1, fp);
128
+ }
129
+ }
130
+ /*printf(" read 5.6 Value labels (%d)\n", f->num_vlt);*/
131
+
132
+
133
+ fclose(fp);
134
+ return f;
135
+ }
@@ -0,0 +1,23 @@
1
+
2
+ #ifndef STATA_READ_H
3
+ #define STATA_READ_H
4
+
5
+ #include <inttypes.h>
6
+
7
+ extern int16_t read_int16_t();
8
+ extern int32_t read_int32_t();
9
+
10
+ extern uint16_t read_uint16_t();
11
+ extern uint32_t read_uint32_t();
12
+ extern uint64_t read_uint64_t();
13
+
14
+ extern char * read_string(int length);
15
+ extern char ** read_strings(int num, int length);
16
+
17
+ extern float read_float_t();
18
+ extern double read_double_t();
19
+
20
+ struct stata_file;
21
+ extern struct stata_file * read_stata_file(char * filename);
22
+
23
+ #endif
Binary file
@@ -0,0 +1,168 @@
1
+
2
+ #include <math.h>
3
+ #include <ruby.h>
4
+ #include "Stata.h"
5
+ #include "Read.h"
6
+
7
+ #ifdef HAVE_RUBY_ENCODING_H
8
+ # include <ruby/encoding.h>
9
+ # define ENCODED_STR_NEW2(str, encoding) \
10
+ ({ \
11
+ VALUE _string = rb_str_new2((const char *)str); \
12
+ int _enc = rb_enc_find_index(encoding); \
13
+ rb_enc_associate_index(_string, _enc); \
14
+ _string; \
15
+ })
16
+ #else
17
+ # define ENCODED_STR_NEW2(str, encoding) \
18
+ rb_str_new2((const char *)str)
19
+ #endif
20
+
21
+ VALUE method_read(VALUE self, VALUE file)
22
+ {
23
+ long i,j;
24
+
25
+ if (TYPE(file) != T_STRING) rb_raise(rb_eArgError, "filename is not a string, but instead of type '%d' (in C)", TYPE(file));
26
+
27
+ struct stata_file * f = read_stata_file(rb_string_value_cstr(&file));
28
+ if (f == NULL) rb_raise(rb_eRuntimeError, "Read Error");
29
+ if (f->error) rb_raise(rb_eRuntimeError, "%s", f->error);
30
+
31
+
32
+ /* 5.1 Headers */
33
+ VALUE r = rb_hash_new();
34
+ rb_hash_aset(r, ENCODED_STR_NEW2("file_name", "ASCII-8BIT"), ENCODED_STR_NEW2(f->filename, "ASCII-8BIT"));
35
+ rb_hash_aset(r, ENCODED_STR_NEW2("data_label", "ASCII-8BIT"), ENCODED_STR_NEW2(f->data_label, "ASCII-8BIT"));
36
+ rb_hash_aset(r, ENCODED_STR_NEW2("time_stamp", "ASCII-8BIT"), ENCODED_STR_NEW2(f->time_stamp, "ASCII-8BIT"));
37
+ rb_hash_aset(r, ENCODED_STR_NEW2("nvar", "ASCII-8BIT"), INT2NUM(f->nvar));
38
+ rb_hash_aset(r, ENCODED_STR_NEW2("nobs", "ASCII-8BIT"), INT2NUM(f->nobs));
39
+
40
+ VALUE data = rb_ary_new();
41
+ for (i = 0 ; i < f->nobs ; i++)
42
+ {
43
+ VALUE row = rb_ary_new();
44
+ for (j = 0 ; j < f->nvar ; j++)
45
+ {
46
+ char symbol_name[100];
47
+ sprintf(symbol_name, "%d", f->obs[i].var[j].v_type);
48
+ VALUE var = Qnil;
49
+ sprintf(symbol_name, "dot_");
50
+
51
+ if (f->obs[i].var[j].v_type == V_STR && f->obs[i].var[j].v_str != NULL)
52
+ var = ENCODED_STR_NEW2(f->obs[i].var[j].v_str, "ASCII-8BIT");
53
+ else if (f->obs[i].var[j].v_type == V_BYTE)
54
+ {
55
+ if (f->obs[i].var[j].v_byte >= -127 && f->obs[i].var[j].v_byte <= 100)
56
+ var = INT2NUM((int)f->obs[i].var[j].v_byte);
57
+ else if (f->obs[i].var[j].v_byte > 100)
58
+ {
59
+ int dot = f->obs[i].var[j].v_byte - 101;
60
+ if (dot == 0) symbol_name[3] = 0;
61
+ else symbol_name[4] = dot+96;
62
+ symbol_name[5] = 0;
63
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
64
+ }
65
+ }
66
+ else if (f->obs[i].var[j].v_type == V_INT)
67
+ {
68
+ if (f->obs[i].var[j].v_int >= -32767 && f->obs[i].var[j].v_int <= 32740)
69
+ var = INT2NUM((int)f->obs[i].var[j].v_int);
70
+ else if (f->obs[i].var[j].v_int > 32740)
71
+ {
72
+ int dot = f->obs[i].var[j].v_int - 32741;
73
+ if (dot == 0) symbol_name[3] = 0;
74
+ else symbol_name[4] = dot+96;
75
+ symbol_name[5] = 0;
76
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
77
+ }
78
+ }
79
+ else if (f->obs[i].var[j].v_type == V_LONG)
80
+ {
81
+ if (f->obs[i].var[j].v_long >= -2147483647 && f->obs[i].var[j].v_long <= 2147483620)
82
+ var = LONG2NUM((int)f->obs[i].var[j].v_long);
83
+ else if (f->obs[i].var[j].v_long > 2147483620)
84
+ {
85
+ int dot = f->obs[i].var[j].v_long - 2147483621;
86
+ if (dot == 0) symbol_name[3] = 0;
87
+ else symbol_name[4] = dot+96;
88
+ symbol_name[5] = 0;
89
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
90
+ }
91
+ }
92
+ else if (f->obs[i].var[j].v_type == V_FLOAT)
93
+ {
94
+ if (f->obs[i].var[j].v_float < pow(2, 127))
95
+ {
96
+ var = rb_float_new(f->obs[i].var[j].v_float);
97
+ }
98
+ else if (f->obs[i].var[j].v_float >= pow(2, 127))
99
+ {
100
+ int dot = (f->obs[i].var[j].v_float - (float)pow(2, 127)) / (float)pow(2, 115);
101
+ if (dot == 0) symbol_name[3] = 0;
102
+ else symbol_name[4] = dot+96;
103
+ symbol_name[5] = 0;
104
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
105
+ }
106
+ }
107
+ else if (f->obs[i].var[j].v_type == V_DOUBLE)
108
+ {
109
+ if (f->obs[i].var[j].v_double < pow(2, 1023))
110
+ {
111
+ var = rb_float_new(f->obs[i].var[j].v_double);
112
+ }
113
+ else if (f->obs[i].var[j].v_double >= pow(2, 1023))
114
+ {
115
+ int dot = (int)((f->obs[i].var[j].v_double - pow(2, 1023)) / pow(2, 1011));
116
+ if (dot == 0) symbol_name[3] = 0;
117
+ else symbol_name[4] = dot+96;
118
+ symbol_name[5] = 0;
119
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
120
+ }
121
+ }
122
+
123
+ rb_ary_push(row, var);
124
+ }
125
+ rb_ary_push(data, row);
126
+ }
127
+ rb_hash_aset(r, ENCODED_STR_NEW2("data", "ASCII-8BIT"), data);
128
+
129
+
130
+ VALUE fields = rb_ary_new();
131
+ for (i = 0 ; i < f->nvar ; i++)
132
+ {
133
+ VALUE field = rb_hash_new();
134
+ rb_hash_aset(field, ENCODED_STR_NEW2("id", "ASCII-8BIT"), INT2NUM(i+1));
135
+ rb_hash_aset(field, ENCODED_STR_NEW2("type", "ASCII-8BIT"), INT2NUM(f->typlist[i]));
136
+ rb_hash_aset(field, ENCODED_STR_NEW2("name", "ASCII-8BIT"), ENCODED_STR_NEW2(f->varlist[i], "ASCII-8BIT"));
137
+ rb_hash_aset(field, ENCODED_STR_NEW2("format", "ASCII-8BIT"), ENCODED_STR_NEW2(f->fmtlist[i], "ASCII-8BIT"));
138
+ rb_hash_aset(field, ENCODED_STR_NEW2("variable_label", "ASCII-8BIT"), ENCODED_STR_NEW2(f->variable_labels[i], "ASCII-8BIT"));
139
+ rb_hash_aset(field, ENCODED_STR_NEW2("value_label", "ASCII-8BIT"), ENCODED_STR_NEW2(f->lbllist[i], "ASCII-8BIT"));
140
+ rb_hash_aset(field, ENCODED_STR_NEW2("sort", "ASCII-8BIT"), INT2NUM(f->srtlist[i]));
141
+ rb_ary_push(fields, field);
142
+ }
143
+ rb_hash_aset(r, ENCODED_STR_NEW2("fields", "ASCII-8BIT"), fields);
144
+
145
+
146
+ VALUE vlt = rb_ary_new();
147
+ for (i = 0 ; i < f->num_vlt ; i++)
148
+ {
149
+ VALUE v = rb_hash_new();
150
+ rb_hash_aset(v, ENCODED_STR_NEW2("name", "ASCII-8BIT"), ENCODED_STR_NEW2(f->vlt[i].name, "ASCII-8BIT"));
151
+ VALUE table = rb_ary_new();
152
+ for (j = 0 ; j < f->vlt[i].n ; j++)
153
+ {
154
+ VALUE row = rb_ary_new();
155
+ rb_ary_push(row, INT2NUM(f->vlt[i].val[j]));
156
+ rb_ary_push(row, ENCODED_STR_NEW2(f->vlt[i].txtbuf + f->vlt[i].off[j], "ASCII-8BIT"));
157
+
158
+ rb_ary_push(table, row);
159
+ }
160
+
161
+ rb_hash_aset(v, ENCODED_STR_NEW2("table", "ASCII-8BIT"), table);
162
+ rb_ary_push(vlt, v);
163
+ }
164
+ rb_hash_aset(r, ENCODED_STR_NEW2("value_labels", "ASCII-8BIT"), vlt);
165
+
166
+ free_stata(f);
167
+ return r;
168
+ }
Binary file
Binary file
@@ -0,0 +1,81 @@
1
+
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <math.h>
5
+ #include <string.h>
6
+ #include <inttypes.h>
7
+
8
+ #include "Stata.h"
9
+ #include "Read.h"
10
+ #include "Write.h"
11
+
12
+ FILE *fp;
13
+ int swap_endian_needed = 0;
14
+ int verbose = 0;
15
+
16
+ char get_host_endian()
17
+ {
18
+ int i = 1;
19
+ char *p = (char *) &i;
20
+ return (p[0] == 1) ? 0x02 : 0x01;
21
+ }
22
+
23
+ int set_error(struct stata_file * f, const char * error)
24
+ {
25
+ f->error = malloc(strlen(error) + 1);
26
+ strcpy(f->error, error);
27
+ return 0;
28
+ }
29
+
30
+ void free_stata(struct stata_file * f)
31
+ {
32
+ long i,j;
33
+
34
+ free(f->filename);
35
+ free(f->error);
36
+ for (i = 0 ; i < f->nvar ; i++) free(f->varlist[i]);
37
+ for (i = 0 ; i < f->nvar ; i++) free(f->fmtlist[i]);
38
+ for (i = 0 ; i < f->nvar ; i++) free(f->lbllist[i]);
39
+ for (i = 0 ; i < f->nvar ; i++) free(f->variable_labels[i]);
40
+ free(f->typlist);
41
+ free(f->varlist);
42
+ free(f->srtlist);
43
+ free(f->fmtlist);
44
+ free(f->lbllist);
45
+ free(f->variable_labels);
46
+
47
+ for (i = 0 ; i < f->nobs ; i++)
48
+ {
49
+ for (j = 0 ; j < f->nvar ; j++)
50
+ if (f->obs[i].var[j].v_type == V_STR)
51
+ free(f->obs[i].var[j].v_str);
52
+ free(f->obs[i].var);
53
+ }
54
+ free(f->obs);
55
+
56
+ for (i = 0 ; i < f->num_vlt ; i++)
57
+ {
58
+ free(f->vlt[i].off);
59
+ free(f->vlt[i].val);
60
+ free(f->vlt[i].txtbuf);
61
+ }
62
+ free(f->vlt);
63
+
64
+ free(f);
65
+ }
66
+
67
+ int main(int argc, char *argv[])
68
+ {
69
+ if (argc != 2) { fprintf(stderr, "usage:\n ./read [file]\n"); return(0); }
70
+
71
+ verbose = 1;
72
+
73
+ char src_file[100];
74
+ char dst_file[100];
75
+ sprintf(src_file, "%s.dta", argv[1]);
76
+ sprintf(dst_file, "%s_resave.dta", argv[1]);
77
+
78
+ struct stata_file * f = read_stata_file(src_file);
79
+ write_stata_file(dst_file, f);
80
+ return 0;
81
+ }
@@ -0,0 +1,72 @@
1
+
2
+ #ifndef STATA_H
3
+ #define STATA_H
4
+
5
+ #include <stdio.h>
6
+ #include <inttypes.h>
7
+
8
+ extern FILE * fp;
9
+ extern int swap_endian_needed;
10
+ extern int verbose;
11
+
12
+ enum stata_type {
13
+ V_INVALID, V_STR, V_BYTE, V_INT, V_LONG, V_FLOAT, V_DOUBLE
14
+ };
15
+
16
+ struct stata_var {
17
+ enum stata_type v_type;
18
+ union {
19
+ char * v_str;
20
+ int8_t v_byte;
21
+ int16_t v_int;
22
+ int32_t v_long;
23
+ float v_float;
24
+ double v_double;
25
+ };
26
+ };
27
+
28
+ struct stata_obs {
29
+ struct stata_var * var;
30
+ };
31
+
32
+ struct stata_vlt {
33
+ char name[33];
34
+ int32_t n;
35
+ int32_t txtlen;
36
+ int32_t * off;
37
+ int32_t * val;
38
+ char * txtbuf;
39
+ };
40
+
41
+ struct stata_file {
42
+ char * filename;
43
+
44
+ int8_t ds_format;
45
+ int8_t byteorder;
46
+ int8_t filetype;
47
+ int8_t unused;
48
+ uint16_t nvar;
49
+ uint32_t nobs;
50
+ char data_label[81];
51
+ char time_stamp[18];
52
+
53
+ char * error;
54
+
55
+ uint8_t * typlist;
56
+ char ** varlist;
57
+ uint16_t * srtlist;
58
+ char ** fmtlist;
59
+ char ** lbllist;
60
+ char ** variable_labels;
61
+
62
+ struct stata_obs * obs;
63
+
64
+ int num_vlt;
65
+ struct stata_vlt * vlt;
66
+ };
67
+
68
+ extern char get_host_endian();
69
+ extern int set_error(struct stata_file * f, const char * error);
70
+ extern void free_stata(struct stata_file * f);
71
+
72
+ #endif
Binary file
@@ -0,0 +1,28 @@
1
+
2
+ #include <ruby.h>
3
+ #include "Stata.h"
4
+
5
+ VALUE method_read(VALUE self, VALUE file);
6
+ VALUE method_write(VALUE self, VALUE file);
7
+ VALUE method_get_verbose(VALUE self);
8
+ VALUE method_set_verbose(VALUE self, VALUE value);
9
+
10
+ void Init_Stata()
11
+ {
12
+ VALUE Stata_module = rb_define_module("Stata");
13
+ rb_define_singleton_method(Stata_module, "read", method_read, 1);
14
+ rb_define_singleton_method(Stata_module, "write", method_write, 2);
15
+ rb_define_singleton_method(Stata_module, "verbose", method_get_verbose, 0);
16
+ rb_define_singleton_method(Stata_module, "verbose=", method_set_verbose, 1);
17
+ }
18
+
19
+ VALUE method_get_verbose(VALUE self)
20
+ {
21
+ return verbose ? Qtrue : Qfalse;
22
+ }
23
+
24
+ VALUE method_set_verbose(VALUE self, VALUE value)
25
+ {
26
+ verbose = RTEST(value);
27
+ return method_get_verbose(self);
28
+ }
Binary file
@@ -0,0 +1,114 @@
1
+
2
+ #include "Write.h"
3
+ #include "Stata.h"
4
+ #include <stdio.h>
5
+ #include <stdlib.h>
6
+ #include <string.h>
7
+ #include <math.h>
8
+
9
+ struct stata_file;
10
+
11
+ int write_stata_file(char * filename, struct stata_file * f)
12
+ {
13
+ if (verbose) printf("write file '%s'\n", filename);
14
+
15
+ long i,j;
16
+
17
+ if (f == NULL) return 0;
18
+
19
+ fp = fopen(filename, "wb");
20
+ if (fp == NULL) return set_error(f, "error opening file");
21
+
22
+ if (f->nvar <= 0) return set_error(f, "nvar should be more then 0");
23
+ if (f->nobs <= 0) return set_error(f, "nobs should be more then 0");
24
+
25
+ /* 5.1 Headers */
26
+ char header[4] = {0x72, get_host_endian(), 0x01, 0x00};
27
+ if (fwrite(header, 4, 1, fp) != 1) return set_error(f, "fwrite to file failed");
28
+ if (fwrite(&f->nvar, sizeof(f->nvar), 1, fp) != 1) return set_error(f, "fwrite to file failed");
29
+ if (fwrite(&f->nobs, sizeof(f->nobs), 1, fp) != 1) return set_error(f, "fwrite to file failed");
30
+ if (fwrite(f->data_label, sizeof(f->data_label), 1, fp) != 1) return set_error(f, "fwrite to file failed");
31
+ f->time_stamp[17] = 0;
32
+ if (fwrite(f->time_stamp, sizeof(f->time_stamp), 1, fp) != 1) return set_error(f, "fwrite to file failed");
33
+
34
+
35
+ /* 5.2 Descriptors */
36
+ if (fwrite(f->typlist, 1, f->nvar, fp) != f->nvar) return set_error(f, "fwrite to file failed");
37
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->varlist[i], 33, 1, fp) != 1) return set_error(f, "fwrite to file failed");
38
+ if (fwrite(f->srtlist, 2, f->nvar+1, fp) != (unsigned int)f->nvar+1) return set_error(f, "fwrite to file failed");
39
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->fmtlist[i], 49, 1, fp) != 1) return set_error(f, "fwrite to file failed");
40
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->lbllist[i], 33, 1, fp) != 1) return set_error(f, "fwrite to file failed");
41
+
42
+
43
+ /* 5.3 Variable Labels */
44
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->variable_labels[i], 81, 1, fp) != 1) return set_error(f, "fwrite to file failed");
45
+
46
+
47
+ /* 5.4 Expansion Fields */
48
+ char zeros[5] = {0,0,0,0,0};
49
+ if (fwrite(zeros, 5, 1, fp) != 1) return set_error(f, "fwrite to file failed");
50
+
51
+
52
+ /* 5.5 Data */
53
+ /*printf(" write 5.5 Data (%dx%d)\n", f->nobs, f->nvar);*/
54
+ for (j = 0 ; j < f->nobs ; j++)
55
+ {
56
+ for (i = 0 ; i < f->nvar ; i++)
57
+ {
58
+ struct stata_var * var = &f->obs[j].var[i];
59
+ if (f->typlist[i] != 0 && f->typlist[i] < 245) { if (fwrite(var->v_str, f->typlist[i], 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
60
+ else if (f->typlist[i] == 251) { if (fwrite(&var->v_byte, sizeof(var->v_byte), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
61
+ else if (f->typlist[i] == 252) { if (fwrite(&var->v_int, sizeof(var->v_int), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
62
+ else if (f->typlist[i] == 253) { if (fwrite(&var->v_long, sizeof(var->v_long), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
63
+ else if (f->typlist[i] == 254) { if (fwrite(&var->v_float, sizeof(var->v_float), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
64
+ else if (f->typlist[i] == 255) { if (fwrite(&var->v_double, sizeof(var->v_double), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
65
+ }
66
+ }
67
+
68
+
69
+ /* 5.6 Value Labels */
70
+ /*printf(" write 5.6 Value Labels (%d)\n", f->num_vlt);*/
71
+ for (i = 0 ; i < f->num_vlt ; i++)
72
+ {
73
+ struct stata_vlt * vlt = &f->vlt[i];
74
+ uint32_t len = 4 + 4 + 4*vlt->n + 4*vlt->n + vlt->txtlen;
75
+ if (fwrite(&len, sizeof(len), 1, fp) != 1) return set_error(f, "fwrite to file failed");
76
+ if (fwrite(vlt->name, 33, 1, fp) != 1) return set_error(f, "fwrite to file failed");
77
+ if (fwrite(zeros, 3, 1, fp) != 1) return set_error(f, "fwrite to file failed");
78
+ if (fwrite(&vlt->n, sizeof(vlt->n), 1, fp) != 1) return set_error(f, "fwrite to file failed");
79
+ uint32_t txtlen = 0;
80
+ for (j = 0 ; j < vlt->n ; j++)
81
+ txtlen += (int)strlen(vlt->txtbuf + vlt->off[j]) + 1;
82
+
83
+ if (fwrite(&txtlen, sizeof(txtlen), 1, fp) != 1) return set_error(f, "fwrite to file failed");
84
+ if (fwrite(vlt->off, sizeof(uint32_t), vlt->n, fp) != (unsigned int)vlt->n) return set_error(f, "fwrite to file failed");
85
+ if (fwrite(vlt->val, sizeof(uint32_t), vlt->n, fp) != (unsigned int)vlt->n) return set_error(f, "fwrite to file failed");
86
+ if (fwrite(vlt->txtbuf, txtlen, 1, fp) != 1) return set_error(f, "fwrite to file failed");
87
+ }
88
+
89
+ fclose(fp);
90
+ return 0;
91
+ }
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
@@ -0,0 +1,10 @@
1
+
2
+ #ifndef STATA_WRITE_H
3
+ #define STATA_WRITE_H
4
+
5
+ #include <inttypes.h>
6
+
7
+ struct stata_file;
8
+ extern int write_stata_file(char * filename, struct stata_file * file);
9
+
10
+ #endif
Binary file
@@ -0,0 +1,259 @@
1
+
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <math.h>
5
+ #include <ruby.h>
6
+ #include "Stata.h"
7
+ #include "Write.h"
8
+
9
+ #ifndef RUBY_19
10
+ #ifndef RFLOAT_VALUE
11
+ #define RFLOAT_VALUE(v) (RFLOAT(v)->value)
12
+ #endif
13
+ #ifndef RARRAY_LEN
14
+ #define RARRAY_LEN(v) (RARRAY(v)->len)
15
+ #endif
16
+ #ifndef RARRAY_PTR
17
+ #define RARRAY_PTR(v) (RARRAY(v)->ptr)
18
+ #endif
19
+ #endif
20
+
21
+ int populate_fields_from_ruby_index = 0;
22
+ VALUE populate_fields_from_ruby(VALUE field, struct stata_file * f)
23
+ {
24
+ VALUE v;
25
+
26
+ v = rb_hash_aref(field, rb_str_new2("type"));
27
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "field type is not provided or is not a Fixnum");
28
+ f->typlist[populate_fields_from_ruby_index] = NUM2INT(v);
29
+
30
+ v = rb_hash_aref(field, rb_str_new2("name"));
31
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field name is not provided or is not a Fixnum");
32
+ f->varlist[populate_fields_from_ruby_index] = (char*)malloc(33);
33
+ strncpy(f->varlist[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 33);
34
+
35
+ v = rb_hash_aref(field, rb_str_new2("sort"));
36
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "field sort is not provided or is not a Fixnum");
37
+ f->srtlist[populate_fields_from_ruby_index] = NUM2INT(v);
38
+
39
+ v = rb_hash_aref(field, rb_str_new2("format"));
40
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field format is not provided or is not a Fixnum");
41
+ f->fmtlist[populate_fields_from_ruby_index] = (char*)malloc(49);
42
+ strncpy(f->fmtlist[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 49);
43
+
44
+ v = rb_hash_aref(field, rb_str_new2("value_label"));
45
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field value_label is not provided or is not a String");
46
+ f->lbllist[populate_fields_from_ruby_index] = (char*)malloc(33);
47
+ strncpy(f->lbllist[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 33);
48
+
49
+ v = rb_hash_aref(field, rb_str_new2("variable_label"));
50
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field variable_label is not provided or is not a String");
51
+ f->variable_labels[populate_fields_from_ruby_index] = (char*)malloc(81);
52
+ strncpy(f->variable_labels[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 81);
53
+
54
+ populate_fields_from_ruby_index++;
55
+ }
56
+
57
+ int populate_data_from_ruby_index = 0;
58
+ VALUE populate_data_from_ruby(VALUE row, struct stata_file * f)
59
+ {
60
+ VALUE v;
61
+ int j = 0;//, i = populate_data_from_ruby_index;
62
+ for (j = 0 ; j < f->nvar ; j++)
63
+ {
64
+ v = rb_ary_entry(row, j);
65
+ struct stata_var * var = &f->obs[populate_data_from_ruby_index].var[j];
66
+
67
+ if (f->typlist[j] == 255) var->v_type = V_DOUBLE;
68
+ else if (f->typlist[j] == 254) var->v_type = V_FLOAT;
69
+ else if (f->typlist[j] == 253) var->v_type = V_LONG;
70
+ else if (f->typlist[j] == 252) var->v_type = V_INT;
71
+ else if (f->typlist[j] == 251) var->v_type = V_BYTE;
72
+ else if (f->typlist[j] <= 244) var->v_type = f->typlist[j];
73
+ else rb_raise(rb_eArgError, "type is %d", f->typlist[j]);
74
+
75
+ switch (TYPE(v)) {
76
+ case T_SYMBOL:
77
+ v = rb_str_new2(rb_id2name(SYM2ID(v)));
78
+ const char * symbol_name = RSTRING_PTR(v);
79
+
80
+ int dot = 0;
81
+ if (strlen(symbol_name) == 5) dot = symbol_name[4] - 96;
82
+ if (dot < 0 || dot > 26) { rb_raise(rb_eArgError, "INVALID SYMBOL '%s'", symbol_name); continue; }
83
+
84
+ if (f->typlist[j] == 255) var->v_double = pow(2, 1023) + dot*pow(2, 1011);
85
+ else if (f->typlist[j] == 254) var->v_float = (float)pow(2, 127) + dot*(float)pow(2, 115);
86
+ else if (f->typlist[j] == 253) var->v_long = 2147483621 + dot;
87
+ else if (f->typlist[j] == 252) var->v_int = 32741 + dot;
88
+ else if (f->typlist[j] == 251) var->v_byte = 101 + dot;
89
+ else rb_raise(rb_eArgError, "invalid typlist '%d' %d", f->typlist[j], TYPE(v));
90
+ break;
91
+ case T_BIGNUM:
92
+ case T_FIXNUM:
93
+ case T_FLOAT:
94
+ if (f->typlist[j] == 255) var->v_double = rb_num2dbl(v);
95
+ else if (f->typlist[j] == 254) var->v_float = (float)rb_num2dbl(v);
96
+ else if (f->typlist[j] == 253) var->v_long = (int32_t)FIX2LONG(v);
97
+ else if (f->typlist[j] == 252) var->v_int = FIX2LONG(v);
98
+ else if (f->typlist[j] == 251) var->v_byte = FIX2LONG(v);
99
+ else rb_raise(rb_eArgError, "invalid typlist '%d' %d %f", f->typlist[j], TYPE(v), RFLOAT_VALUE(v));
100
+ break;
101
+ case T_STRING:
102
+ var->v_type = f->typlist[j];
103
+ var->v_str = (char*)malloc(f->typlist[j]+1);
104
+ strncpy(var->v_str, RSTRING_PTR(v), f->typlist[j]+1);
105
+ break;
106
+ case T_NIL:
107
+ rb_raise(rb_eArgError, "nil value submitted");
108
+ break;
109
+ default:
110
+ rb_raise(rb_eArgError, "unsupported ruby type: %d", TYPE(v));
111
+ break;
112
+ }
113
+ }
114
+ populate_data_from_ruby_index++;
115
+ }
116
+
117
+ int populate_value_labels_from_ruby_index = 0;
118
+ VALUE populate_value_labels_from_ruby(VALUE r_vlt, struct stata_file * f)
119
+ {
120
+ VALUE v;
121
+
122
+ f->num_vlt++;
123
+ f->vlt = (struct stata_vlt *)realloc(f->vlt, sizeof(struct stata_vlt)*f->num_vlt);
124
+ struct stata_vlt * vlt = &f->vlt[f->num_vlt-1];
125
+
126
+ if (TYPE(r_vlt) != T_HASH) rb_raise(rb_eArgError, "Value label table should be a Hash");
127
+
128
+ v = rb_hash_aref(r_vlt, rb_str_new2("name"));
129
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "Value label table name isn't provided or isn't a String");
130
+ strncpy(vlt->name, RSTRING_PTR(v), 33);
131
+
132
+ v = rb_hash_aref(r_vlt, rb_str_new2("table"));
133
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "Value label table name isn't provided or isn't an Array");
134
+
135
+ vlt->n = (int32_t)RARRAY_LEN(v);
136
+ vlt->txtlen = 0;
137
+ vlt->off = (uint32_t*)malloc(sizeof(uint32_t)*vlt->n);
138
+ vlt->val = (uint32_t*)malloc(sizeof(uint32_t)*vlt->n);
139
+ vlt->txtbuf = NULL;
140
+
141
+ int i;
142
+ for (i = 0 ; i < RARRAY_LEN(v) ; i++)
143
+ {
144
+ VALUE r = rb_ary_entry(v, i);
145
+ if (TYPE(r) != T_ARRAY) rb_raise(rb_eArgError, "value label contains a row which isn't an Array");
146
+ if (TYPE(rb_ary_entry(r, 0)) != T_FIXNUM) rb_raise(rb_eArgError, "value label contains a value which isn't a Fixnum");
147
+ if (TYPE(rb_ary_entry(r, 1)) != T_STRING) rb_raise(rb_eArgError, "value label contains a label which isn't a String");
148
+ char * txt = RSTRING_PTR(rb_ary_entry(r, 1));
149
+ vlt->txtlen += (int32_t)strlen(txt)+1;
150
+ }
151
+ vlt->txtbuf = (char*)malloc(vlt->txtlen);
152
+
153
+ vlt->txtlen = 0;
154
+ for (i = 0 ; i < RARRAY_LEN(v) ; i++)
155
+ {
156
+ VALUE r = rb_ary_entry(v, i);
157
+ vlt->val[i] = NUM2INT(rb_ary_entry(r, 0));
158
+ char * txt = RSTRING_PTR(rb_ary_entry(r, 1));
159
+ vlt->txtlen += (int32_t)strlen(txt)+1;
160
+
161
+ vlt->off[i] = vlt->txtlen-((int32_t)strlen(txt)+1);
162
+ memcpy(vlt->txtbuf+vlt->off[i], txt, strlen(txt)+1);
163
+ }
164
+
165
+ populate_value_labels_from_ruby_index++;
166
+ }
167
+
168
+ VALUE method_write(VALUE self, VALUE filename, VALUE data)
169
+ {
170
+ VALUE v;
171
+ if (TYPE(data) != T_HASH) rb_raise(rb_eArgError, "Content to be written should be a hash");
172
+ if (TYPE(filename) != T_STRING) rb_raise(rb_eArgError, "Filename for writing is not a string");
173
+
174
+ if (rb_hash_aref(data, rb_str_new2("nvar")) == Qnil) rb_raise(rb_eArgError, "nvar is required");
175
+ if (rb_hash_aref(data, rb_str_new2("nobs")) == Qnil) rb_raise(rb_eArgError, "nobs is required");
176
+ if (rb_hash_aref(data, rb_str_new2("fields")) == Qnil) rb_raise(rb_eArgError, "no fields provided");
177
+ if (rb_hash_aref(data, rb_str_new2("data")) == Qnil) rb_raise(rb_eArgError, "no data provided");
178
+
179
+ struct stata_file * f = (struct stata_file *)malloc(sizeof(struct stata_file));
180
+ if (f == NULL) rb_raise(rb_eArgError, "Could not allocate memory for the stata file");
181
+ memset(f, 0, sizeof(struct stata_file));
182
+
183
+
184
+ /* 5.1 Headers */
185
+ v = rb_hash_aref(data, rb_str_new2("nvar"));
186
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "nvar is not provided or is not a fixnum");
187
+ f->nvar = NUM2UINT(v);
188
+
189
+ v = rb_hash_aref(data, rb_str_new2("nobs"));
190
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "nobs is not provided or is not a fixnum");
191
+ f->nobs = NUM2UINT(v);
192
+
193
+ v = rb_hash_aref(data, rb_str_new2("data_label"));
194
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "data_label is not provided or is not a string");
195
+ strncpy(f->data_label, rb_string_value_cstr(&v), sizeof(f->data_label));
196
+
197
+ v = rb_hash_aref(data, rb_str_new2("time_stamp"));
198
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "time_stamp is not provided or is not a string");
199
+ strncpy(f->time_stamp, rb_string_value_cstr(&v), sizeof(f->time_stamp));
200
+
201
+
202
+ /* 5.2 and 5.3, Descriptors and Variable Labels */
203
+ f->typlist = (uint8_t *)malloc(f->nvar);
204
+ if (f->typlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
205
+ f->varlist = (char **)malloc(sizeof(char *)*f->nvar);
206
+ if (f->varlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
207
+ f->srtlist = (uint16_t *)malloc(sizeof(uint16_t)*(f->nvar+1));
208
+ if (f->srtlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
209
+ f->fmtlist = (char **)malloc(sizeof(char *)*f->nvar);
210
+ if (f->fmtlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
211
+ f->lbllist = (char **)malloc(sizeof(char *)*f->nvar);
212
+ if (f->lbllist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
213
+ f->variable_labels = (char **)malloc(sizeof(char *)*f->nvar);
214
+ if (f->variable_labels == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
215
+
216
+ v = rb_hash_aref(data, rb_str_new2("fields"));
217
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "fields are not provided or are not an array");
218
+
219
+ populate_fields_from_ruby_index = 0;
220
+ rb_iterate(rb_each, v, populate_fields_from_ruby, (VALUE)f);
221
+
222
+
223
+ /* 5.3 Expansion Fields - nothing comes from ruby */
224
+
225
+
226
+ /* 5.4 Data */
227
+ long i, j;
228
+ f->obs = (struct stata_obs *)malloc(sizeof(struct stata_obs)*f->nobs);
229
+ for (j = 0 ; j < f->nobs ; j++)
230
+ {
231
+ f->obs[j].var = (struct stata_var *)malloc(sizeof(struct stata_var)*f->nvar);
232
+ for (i = 0 ; i < f->nvar ; i++)
233
+ {
234
+ struct stata_var * var = &f->obs[j].var[i];
235
+ memset(var, 0, sizeof(struct stata_var));
236
+ }
237
+ }
238
+ v = rb_hash_aref(data, rb_str_new2("data"));
239
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "data is not provided or is not an array");
240
+
241
+ populate_data_from_ruby_index = 0;
242
+ rb_iterate(rb_each, v, populate_data_from_ruby, (VALUE)f);
243
+
244
+
245
+ /* 5.5 Value Label Tables */
246
+ v = rb_hash_aref(data, rb_str_new2("value_labels"));
247
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "value labels are not provided or are not an array");
248
+
249
+ populate_value_labels_from_ruby_index = 0;
250
+ rb_iterate(rb_each, v, populate_value_labels_from_ruby, (VALUE)f);
251
+
252
+ write_stata_file(RSTRING_PTR(filename), f);
253
+
254
+ if (f->error) rb_raise(rb_eRuntimeError, "%s", f->error);
255
+
256
+ free_stata(f);
257
+
258
+ return INT2NUM(1);
259
+ }
Binary file
@@ -0,0 +1,11 @@
1
+ # Loads mkmf which is used to make makefiles for Ruby extensions
2
+ require 'mkmf'
3
+
4
+ # Give it a name
5
+ extension_name = 'Stata'
6
+
7
+ # The destination
8
+ dir_config(extension_name)
9
+
10
+ # Do the work
11
+ create_makefile(extension_name)
Binary file
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby -w
2
+ #encoding:ASCII-8BIT
3
+
4
+ $LOAD_PATH << '.'
5
+ require 'Stata'
6
+
7
+ Stata.verbose = true
8
+
9
+ files = if ARGV.empty?
10
+ Dir.glob("test_file_*.dta")
11
+ else
12
+ ARGV
13
+ end
14
+
15
+ if files.empty?
16
+ puts "usage: #{$0} file1.dta [file2.dta ...]"
17
+ exit(-1)
18
+ end
19
+
20
+ files.each do |original|
21
+ if original =~ /_resave/
22
+ puts "skipping #{original.inspect}\n\n"
23
+ next
24
+ end
25
+
26
+ ext = File.extname(original)
27
+ base = original[0..-(ext.length+1)]
28
+ resave = "#{base}_resave#{ext}"
29
+
30
+ s1 = Stata.read(original)
31
+ Stata.write(resave, s1)
32
+
33
+ s2 = Stata.read(resave)
34
+
35
+ ['data', 'fields', 'value_labels'].each do |f|
36
+ if (s1[f] != s2[f])
37
+ puts "ERROR '#{file}' #{f} did not read/write the same!"
38
+ end
39
+ end
40
+
41
+ puts "\n"
42
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: stata
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Kevin Branigan
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-01-18 00:00:00 +00:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 2
30
+ - 4
31
+ version: "2.4"
32
+ type: :development
33
+ version_requirements: *id001
34
+ description:
35
+ email:
36
+ executables: []
37
+
38
+ extensions:
39
+ - ext/extconf.rb
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - ext/extconf.rb
44
+ - ext/Makefile
45
+ - ext/Makefile_c
46
+ - ext/Read.c
47
+ - ext/Read.h
48
+ - ext/Read.o
49
+ - ext/Read.rb.c
50
+ - ext/Read.rb.o
51
+ - ext/Stata.bundle
52
+ - ext/Stata.c
53
+ - ext/Stata.h
54
+ - ext/Stata.o
55
+ - ext/Stata.rb.c
56
+ - ext/Stata.rb.o
57
+ - ext/stata_c_test
58
+ - ext/test.rb
59
+ - ext/Write.c
60
+ - ext/Write.h
61
+ - ext/Write.o
62
+ - ext/Write.rb.c
63
+ - ext/Write.rb.o
64
+ - LICENSE
65
+ - README
66
+ has_rdoc: true
67
+ homepage: http://github.com/unspace/stata
68
+ licenses:
69
+ - MIT
70
+ post_install_message:
71
+ rdoc_options: []
72
+
73
+ require_paths:
74
+ - ext
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ segments:
89
+ - 0
90
+ version: "0"
91
+ requirements: []
92
+
93
+ rubyforge_project:
94
+ rubygems_version: 1.3.7
95
+ signing_key:
96
+ specification_version: 3
97
+ summary: Read and write support for the Stata binary format
98
+ test_files: []
99
+