stata 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (C) 2011 by Unspace Interactive
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
20
+
data/README ADDED
@@ -0,0 +1,21 @@
1
+
2
+ to make the test C app, run:
3
+
4
+ cd ext
5
+ make -f Makefile_c -B
6
+ ./stata_c_test file.dta
7
+
8
+ this will compile the application, read file.dta and create a copy of it from the same data in out.dta
9
+
10
+
11
+ to make the test Ruby app, run:
12
+
13
+ cd ext
14
+ ruby extconf.rb
15
+ make
16
+ ./test.rb file.dta
17
+
18
+ this will do the same, but using the Ruby extension
19
+
20
+
21
+ If the app finds any issues with the data, it will fail an assert, which will give you line numbers to the source.
@@ -0,0 +1,187 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1
8
+ hdrdir = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1
9
+ arch_hdrdir = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/include/ruby-1.9.1/$(arch)
10
+ VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
11
+ prefix = $(DESTDIR)/Users/aanand/.rvm/rubies/ruby-1.9.2-p136
12
+ rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
13
+ exec_prefix = $(prefix)
14
+ vendorhdrdir = $(rubyhdrdir)/vendor_ruby
15
+ sitehdrdir = $(rubyhdrdir)/site_ruby
16
+ rubyhdrdir = $(includedir)/$(RUBY_BASE_NAME)-$(ruby_version)
17
+ vendordir = $(rubylibprefix)/vendor_ruby
18
+ sitedir = $(rubylibprefix)/site_ruby
19
+ ridir = $(datarootdir)/$(RI_BASE_NAME)
20
+ mandir = $(datarootdir)/man
21
+ localedir = $(datarootdir)/locale
22
+ libdir = $(exec_prefix)/lib
23
+ psdir = $(docdir)
24
+ pdfdir = $(docdir)
25
+ dvidir = $(docdir)
26
+ htmldir = $(docdir)
27
+ infodir = $(datarootdir)/info
28
+ docdir = $(datarootdir)/doc/$(PACKAGE)
29
+ oldincludedir = $(DESTDIR)/usr/include
30
+ includedir = $(prefix)/include
31
+ localstatedir = $(prefix)/var
32
+ sharedstatedir = $(prefix)/com
33
+ sysconfdir = $(prefix)/etc
34
+ datadir = $(datarootdir)
35
+ datarootdir = $(prefix)/share
36
+ libexecdir = $(exec_prefix)/libexec
37
+ sbindir = $(exec_prefix)/sbin
38
+ bindir = $(exec_prefix)/bin
39
+ rubylibdir = $(rubylibprefix)/$(ruby_version)
40
+ archdir = $(rubylibdir)/$(arch)
41
+ sitelibdir = $(sitedir)/$(ruby_version)
42
+ sitearchdir = $(sitelibdir)/$(sitearch)
43
+ vendorlibdir = $(vendordir)/$(ruby_version)
44
+ vendorarchdir = $(vendorlibdir)/$(sitearch)
45
+
46
+ CC = gcc
47
+ CXX = g++
48
+ LIBRUBY = $(LIBRUBY_SO)
49
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
50
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
51
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
52
+ OUTFLAG = -o
53
+ COUTFLAG = -o
54
+
55
+ RUBY_EXTCONF_H =
56
+ cflags = $(optflags) $(debugflags) $(warnflags)
57
+ optflags = -O3
58
+ debugflags = -ggdb
59
+ warnflags = -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long
60
+ CFLAGS = -fno-common -isysroot /Developer/SDKs/MacOSX10.6.sdk -arch i386 -fno-common -pipe
61
+ INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
62
+ DEFS =
63
+ CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags)
64
+ CXXFLAGS = $(CFLAGS) $(cxxflags)
65
+ ldflags = -L. -Wl,-syslibroot /Developer/SDKs/MacOSX10.6.sdk -arch i386 -L/usr/local/lib
66
+ dldflags = -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress -Wl,-flat_namespace
67
+ ARCH_FLAG =
68
+ DLDFLAGS = $(ldflags) $(dldflags)
69
+ LDSHARED = $(CC) -dynamic -bundle
70
+ LDSHAREDXX = $(CXX) -dynamic -bundle
71
+ AR = ar
72
+ EXEEXT =
73
+
74
+ RUBY_BASE_NAME = ruby
75
+ RUBY_INSTALL_NAME = ruby
76
+ RUBY_SO_NAME = ruby.1.9.1
77
+ arch = i386-darwin10.5.0
78
+ sitearch = $(arch)
79
+ ruby_version = 1.9.1
80
+ ruby = /Users/aanand/.rvm/rubies/ruby-1.9.2-p136/bin/ruby
81
+ RUBY = $(ruby)
82
+ RM = rm -f
83
+ RM_RF = $(RUBY) -run -e rm -- -rf
84
+ RMDIRS = $(RUBY) -run -e rmdir -- -p
85
+ MAKEDIRS = mkdir -p
86
+ INSTALL = /usr/bin/install -c
87
+ INSTALL_PROG = $(INSTALL) -m 0755
88
+ INSTALL_DATA = $(INSTALL) -m 644
89
+ COPY = cp
90
+
91
+ #### End of system configuration section. ####
92
+
93
+ preload =
94
+
95
+ libpath = . $(libdir)
96
+ LIBPATH = -L. -L$(libdir)
97
+ DEFFILE =
98
+
99
+ CLEANFILES = mkmf.log
100
+ DISTCLEANFILES =
101
+ DISTCLEANDIRS =
102
+
103
+ extout =
104
+ extout_prefix =
105
+ target_prefix =
106
+ LOCAL_LIBS =
107
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lobjc
108
+ SRCS = Read.c Read.rb.c Stata.c Stata.rb.c Write.c Write.rb.c
109
+ OBJS = Read.o Read.rb.o Stata.o Stata.rb.o Write.o Write.rb.o
110
+ TARGET = Stata
111
+ DLLIB = $(TARGET).bundle
112
+ EXTSTATIC =
113
+ STATIC_LIB =
114
+
115
+ BINDIR = $(bindir)
116
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
117
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
118
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
119
+ HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
120
+ ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
121
+
122
+ TARGET_SO = $(DLLIB)
123
+ CLEANLIBS = $(TARGET).bundle
124
+ CLEANOBJS = *.o *.bak
125
+
126
+ all: $(DLLIB)
127
+ static: $(STATIC_LIB)
128
+ .PHONY: all install static install-so install-rb
129
+ .PHONY: clean clean-so clean-rb
130
+
131
+ clean-rb-default::
132
+ clean-rb::
133
+ clean-so::
134
+ clean: clean-so clean-rb-default clean-rb
135
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
136
+
137
+ distclean-rb-default::
138
+ distclean-rb::
139
+ distclean-so::
140
+ distclean: clean distclean-so distclean-rb-default distclean-rb
141
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
142
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
143
+ @-$(RMDIRS) $(DISTCLEANDIRS)
144
+
145
+ realclean: distclean
146
+ install: install-so install-rb
147
+
148
+ install-so: $(RUBYARCHDIR)
149
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
150
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
151
+ @-$(MAKEDIRS) $(@D)
152
+ $(INSTALL_PROG) $(DLLIB) $(@D)
153
+ install-rb: pre-install-rb install-rb-default
154
+ install-rb-default: pre-install-rb-default
155
+ pre-install-rb: Makefile
156
+ pre-install-rb-default: Makefile
157
+ $(RUBYARCHDIR):
158
+ $(MAKEDIRS) $@
159
+
160
+ site-install: site-install-so site-install-rb
161
+ site-install-so: install-so
162
+ site-install-rb: install-rb
163
+
164
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
165
+
166
+ .cc.o:
167
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
168
+
169
+ .cxx.o:
170
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
171
+
172
+ .cpp.o:
173
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
174
+
175
+ .C.o:
176
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
177
+
178
+ .c.o:
179
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
180
+
181
+ $(DLLIB): $(OBJS) Makefile
182
+ @-$(RM) $(@)
183
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
184
+
185
+
186
+
187
+ $(OBJS): $(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h $(arch_hdrdir)/ruby/config.h
@@ -0,0 +1,9 @@
1
+
2
+ OBJS = Read.o Write.o Stata.o
3
+ STD =
4
+
5
+ %.o: %.c
6
+ cc $(STD) -Wall $*.c -c -o $@
7
+
8
+ all: $(OBJS)
9
+ cc $(STD) $(OBJS) -Wall -o stata_c_test
@@ -0,0 +1,135 @@
1
+
2
+ #include "Read.h"
3
+ #include "Stata.h"
4
+ #include <math.h>
5
+ #include <stdio.h>
6
+ #include <stdlib.h>
7
+ #include <string.h>
8
+
9
+ int16_t read_int16_t() { int16_t t=0; fread(&t, sizeof(t), 1, fp); return ((swap_endian_needed==1) ? ((t>>8) | (t<<8)) : t); }
10
+ int32_t read_int32_t() { int32_t t=0; fread(&t, sizeof(t), 1, fp); return (swap_endian_needed==1) ? (int32_t)__builtin_bswap32(t) : t; }
11
+
12
+ uint16_t read_uint16_t() { uint16_t t=0; fread(&t, sizeof(t), 1, fp); return (swap_endian_needed==1) ? (t>>8)|(t<<8) : t; }
13
+ uint32_t read_uint32_t() { uint32_t t=0; fread(&t, sizeof(t), 1, fp); return (swap_endian_needed==1) ? __builtin_bswap32(t) : t; }
14
+
15
+ char * read_string(int length) { char * t = (char*)malloc(length+1); fread(t, length, 1, fp); t[length] = 0; return t; }
16
+ char ** read_strings(int num, int length) { char ** t = (char **)malloc(sizeof(char *)*num); int i; for (i = 0 ; i < num ; i++) t[i] = read_string(length); return t; }
17
+
18
+ float read_float_t() { uint32_t t=0; fread(&t, sizeof(t), 1, fp); if (swap_endian_needed==1) t = __builtin_bswap32(t); return *((float *)(void *)&t); }
19
+ double read_double_t() { uint64_t t=0; fread(&t, sizeof(t), 1, fp); if (swap_endian_needed==1) t = __builtin_bswap64(t); return *((double *)(void *)&t); }
20
+
21
+ struct stata_file * read_stata_file(char * filename)
22
+ {
23
+ if (verbose) printf("read file '%s'\n", filename);
24
+
25
+ long i,j;
26
+
27
+ struct stata_file * f = (struct stata_file *)malloc(sizeof(struct stata_file));
28
+ memset(f, 0, sizeof(struct stata_file));
29
+ f->filename = (char*)malloc(strlen(filename)+1);
30
+ strcpy(f->filename, filename);
31
+
32
+ fp = fopen(f->filename, "rb");
33
+ if (fp == NULL) { set_error(f, "error reading file"); return f; }
34
+
35
+ /*fseek(fp, 0 , SEEK_END);
36
+ long lSize = ftell(fp);
37
+ rewind(fp);
38
+ printf("file is %ld bytes long\n", lSize);*/
39
+
40
+
41
+ /* 5.1 Header */
42
+ f->ds_format = fgetc(fp); if (f->ds_format != 0x72) { set_error(f, "invalid file ds_format"); return f; }
43
+ f->byteorder = fgetc(fp); if (f->byteorder != 0x01 && f->byteorder != 0x02) { set_error(f, "invalid file byteorder"); return f; }
44
+ if (f->byteorder != get_host_endian()) swap_endian_needed = 1;
45
+ f->filetype = fgetc(fp); if (f->filetype != 0x01) { set_error(f, "invalid file filetype"); return f; }
46
+ f->unused = fgetc(fp); if (f->unused != 0x00) { set_error(f, "invalid unused values"); return f; }
47
+ f->nvar = read_uint16_t(); if (f->nvar <= 0) { set_error(f, "invalid nvar (< 1)"); return f; }
48
+ f->nobs = read_uint32_t(); if (f->nobs <= 0) { set_error(f, "invalid nobs (< 1)"); return f; }
49
+ fread(&f->data_label, sizeof(f->data_label), 1, fp);
50
+ fread(&f->time_stamp, sizeof(f->time_stamp), 1, fp);
51
+
52
+
53
+ /* 5.2 Descriptors */
54
+ f->typlist = (uint8_t *)malloc(f->nvar);
55
+ fread(f->typlist, 1, f->nvar, fp);
56
+ f->varlist = read_strings(f->nvar, 33);
57
+ f->srtlist = (uint16_t *)malloc(sizeof(uint16_t)*(f->nvar+1));
58
+ for (i = 0 ; i <= f->nvar ; i++) f->srtlist[i] = read_uint16_t();
59
+ f->fmtlist = read_strings(f->nvar, 49);
60
+ f->lbllist = read_strings(f->nvar, 33);
61
+
62
+
63
+ /* 5.3 Variable Labels */
64
+ f->variable_labels = read_strings(f->nvar, 81);
65
+
66
+
67
+ /* 5.4 Expansion Fields */
68
+ uint8_t data_type;
69
+ uint32_t len;
70
+ do {
71
+ data_type = fgetc(fp);
72
+ if (fread(&len, 4, 1, fp) != 1) { set_error(f, "fread from file failed"); return f; }
73
+ if (len > 0) for (i = 0 ; i < len ; i++) fgetc(fp);
74
+ } while(data_type != 0 || len != 0 || feof(fp));
75
+ /*printf ("read %d bytes of expansion fields\n", count);*/
76
+
77
+
78
+ /* 5.5 Data */
79
+ /*printf(" read 5.5 Data (%dx%d)\n", f->nobs, f->nvar);*/
80
+ f->obs = (struct stata_obs *)malloc(sizeof(struct stata_obs)*f->nobs);
81
+ for (j = 0 ; j < f->nobs ; j++)
82
+ {
83
+ f->obs[j].var = (struct stata_var *)malloc(sizeof(struct stata_var)*f->nvar);
84
+ for (i = 0 ; i < f->nvar ; i++)
85
+ {
86
+ struct stata_var * var = &f->obs[j].var[i];
87
+ memset(var, 0, sizeof(struct stata_var));
88
+
89
+ if (f->typlist[i] != 0 &&
90
+ f->typlist[i] < 245) { var->v_type = V_STR; var->v_str = read_string(f->typlist[i]); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
91
+ else if (f->typlist[i] == 251) { var->v_type = V_BYTE; if (fread(&var->v_byte, sizeof(var->v_byte), 1, fp) != 1) { set_error(f, "fread from file failed"); }; if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
92
+ else if (f->typlist[i] == 252) { var->v_type = V_INT; var->v_int = read_int16_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
93
+ else if (f->typlist[i] == 253) { var->v_type = V_LONG; var->v_long = read_int32_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
94
+ else if (f->typlist[i] == 254) { var->v_type = V_FLOAT; var->v_float = read_float_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
95
+ else if (f->typlist[i] == 255) { var->v_type = V_DOUBLE; var->v_double = read_double_t(); if (feof(fp)) { set_error(f, "end of file reached too soon"); return f; }; }
96
+ else fprintf(stderr, "error.\n");
97
+
98
+ if (ferror(fp)) perror("error occurred");
99
+ if (feof(fp)) { fprintf(stderr, "eof error at var %ld (error:%d)\n", i, ferror(fp)); break; }
100
+ }
101
+ if (feof(fp)) { fprintf(stderr, "eof error at obs %ld (error:%d)\n", j, ferror(fp)); exit(1); }
102
+ }
103
+
104
+
105
+ /* 5.6 Value labels */
106
+ if (!feof(fp))
107
+ {
108
+ while (!feof(fp))
109
+ {
110
+ int32_t len = read_int32_t();
111
+ if (feof(fp) || len == 0) break;
112
+
113
+ f->num_vlt++;
114
+ f->vlt = (struct stata_vlt *)realloc(f->vlt, sizeof(struct stata_vlt)*f->num_vlt);
115
+ struct stata_vlt * vlt = &f->vlt[f->num_vlt-1];
116
+
117
+ if (fread(vlt->name, 33, 1, fp) != 1) { set_error(f, "fread from file failed"); return f; };
118
+ fgetc(fp); fgetc(fp); fgetc(fp); /* padding */
119
+
120
+ vlt->n = read_int32_t();
121
+ vlt->txtlen = read_int32_t(); if (vlt->txtlen >= 32000) { set_error(f, "value label table txtlen is > 32000"); return f; };
122
+ vlt->off = (int32_t*)malloc(sizeof(int32_t)*vlt->n);
123
+ for (i = 0 ; i < vlt->n ; i++) vlt->off[i] = read_int32_t();
124
+ vlt->val = (int32_t*)malloc(sizeof(int32_t)*vlt->n);
125
+ for (i = 0 ; i < vlt->n ; i++) vlt->val[i] = read_int32_t();
126
+ vlt->txtbuf = (char*)malloc(vlt->txtlen);
127
+ fread(vlt->txtbuf, vlt->txtlen, 1, fp);
128
+ }
129
+ }
130
+ /*printf(" read 5.6 Value labels (%d)\n", f->num_vlt);*/
131
+
132
+
133
+ fclose(fp);
134
+ return f;
135
+ }
@@ -0,0 +1,23 @@
1
+
2
+ #ifndef STATA_READ_H
3
+ #define STATA_READ_H
4
+
5
+ #include <inttypes.h>
6
+
7
+ extern int16_t read_int16_t();
8
+ extern int32_t read_int32_t();
9
+
10
+ extern uint16_t read_uint16_t();
11
+ extern uint32_t read_uint32_t();
12
+ extern uint64_t read_uint64_t();
13
+
14
+ extern char * read_string(int length);
15
+ extern char ** read_strings(int num, int length);
16
+
17
+ extern float read_float_t();
18
+ extern double read_double_t();
19
+
20
+ struct stata_file;
21
+ extern struct stata_file * read_stata_file(char * filename);
22
+
23
+ #endif
Binary file
@@ -0,0 +1,168 @@
1
+
2
+ #include <math.h>
3
+ #include <ruby.h>
4
+ #include "Stata.h"
5
+ #include "Read.h"
6
+
7
+ #ifdef HAVE_RUBY_ENCODING_H
8
+ # include <ruby/encoding.h>
9
+ # define ENCODED_STR_NEW2(str, encoding) \
10
+ ({ \
11
+ VALUE _string = rb_str_new2((const char *)str); \
12
+ int _enc = rb_enc_find_index(encoding); \
13
+ rb_enc_associate_index(_string, _enc); \
14
+ _string; \
15
+ })
16
+ #else
17
+ # define ENCODED_STR_NEW2(str, encoding) \
18
+ rb_str_new2((const char *)str)
19
+ #endif
20
+
21
+ VALUE method_read(VALUE self, VALUE file)
22
+ {
23
+ long i,j;
24
+
25
+ if (TYPE(file) != T_STRING) rb_raise(rb_eArgError, "filename is not a string, but instead of type '%d' (in C)", TYPE(file));
26
+
27
+ struct stata_file * f = read_stata_file(rb_string_value_cstr(&file));
28
+ if (f == NULL) rb_raise(rb_eRuntimeError, "Read Error");
29
+ if (f->error) rb_raise(rb_eRuntimeError, "%s", f->error);
30
+
31
+
32
+ /* 5.1 Headers */
33
+ VALUE r = rb_hash_new();
34
+ rb_hash_aset(r, ENCODED_STR_NEW2("file_name", "ASCII-8BIT"), ENCODED_STR_NEW2(f->filename, "ASCII-8BIT"));
35
+ rb_hash_aset(r, ENCODED_STR_NEW2("data_label", "ASCII-8BIT"), ENCODED_STR_NEW2(f->data_label, "ASCII-8BIT"));
36
+ rb_hash_aset(r, ENCODED_STR_NEW2("time_stamp", "ASCII-8BIT"), ENCODED_STR_NEW2(f->time_stamp, "ASCII-8BIT"));
37
+ rb_hash_aset(r, ENCODED_STR_NEW2("nvar", "ASCII-8BIT"), INT2NUM(f->nvar));
38
+ rb_hash_aset(r, ENCODED_STR_NEW2("nobs", "ASCII-8BIT"), INT2NUM(f->nobs));
39
+
40
+ VALUE data = rb_ary_new();
41
+ for (i = 0 ; i < f->nobs ; i++)
42
+ {
43
+ VALUE row = rb_ary_new();
44
+ for (j = 0 ; j < f->nvar ; j++)
45
+ {
46
+ char symbol_name[100];
47
+ sprintf(symbol_name, "%d", f->obs[i].var[j].v_type);
48
+ VALUE var = Qnil;
49
+ sprintf(symbol_name, "dot_");
50
+
51
+ if (f->obs[i].var[j].v_type == V_STR && f->obs[i].var[j].v_str != NULL)
52
+ var = ENCODED_STR_NEW2(f->obs[i].var[j].v_str, "ASCII-8BIT");
53
+ else if (f->obs[i].var[j].v_type == V_BYTE)
54
+ {
55
+ if (f->obs[i].var[j].v_byte >= -127 && f->obs[i].var[j].v_byte <= 100)
56
+ var = INT2NUM((int)f->obs[i].var[j].v_byte);
57
+ else if (f->obs[i].var[j].v_byte > 100)
58
+ {
59
+ int dot = f->obs[i].var[j].v_byte - 101;
60
+ if (dot == 0) symbol_name[3] = 0;
61
+ else symbol_name[4] = dot+96;
62
+ symbol_name[5] = 0;
63
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
64
+ }
65
+ }
66
+ else if (f->obs[i].var[j].v_type == V_INT)
67
+ {
68
+ if (f->obs[i].var[j].v_int >= -32767 && f->obs[i].var[j].v_int <= 32740)
69
+ var = INT2NUM((int)f->obs[i].var[j].v_int);
70
+ else if (f->obs[i].var[j].v_int > 32740)
71
+ {
72
+ int dot = f->obs[i].var[j].v_int - 32741;
73
+ if (dot == 0) symbol_name[3] = 0;
74
+ else symbol_name[4] = dot+96;
75
+ symbol_name[5] = 0;
76
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
77
+ }
78
+ }
79
+ else if (f->obs[i].var[j].v_type == V_LONG)
80
+ {
81
+ if (f->obs[i].var[j].v_long >= -2147483647 && f->obs[i].var[j].v_long <= 2147483620)
82
+ var = LONG2NUM((int)f->obs[i].var[j].v_long);
83
+ else if (f->obs[i].var[j].v_long > 2147483620)
84
+ {
85
+ int dot = f->obs[i].var[j].v_long - 2147483621;
86
+ if (dot == 0) symbol_name[3] = 0;
87
+ else symbol_name[4] = dot+96;
88
+ symbol_name[5] = 0;
89
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
90
+ }
91
+ }
92
+ else if (f->obs[i].var[j].v_type == V_FLOAT)
93
+ {
94
+ if (f->obs[i].var[j].v_float < pow(2, 127))
95
+ {
96
+ var = rb_float_new(f->obs[i].var[j].v_float);
97
+ }
98
+ else if (f->obs[i].var[j].v_float >= pow(2, 127))
99
+ {
100
+ int dot = (f->obs[i].var[j].v_float - (float)pow(2, 127)) / (float)pow(2, 115);
101
+ if (dot == 0) symbol_name[3] = 0;
102
+ else symbol_name[4] = dot+96;
103
+ symbol_name[5] = 0;
104
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
105
+ }
106
+ }
107
+ else if (f->obs[i].var[j].v_type == V_DOUBLE)
108
+ {
109
+ if (f->obs[i].var[j].v_double < pow(2, 1023))
110
+ {
111
+ var = rb_float_new(f->obs[i].var[j].v_double);
112
+ }
113
+ else if (f->obs[i].var[j].v_double >= pow(2, 1023))
114
+ {
115
+ int dot = (int)((f->obs[i].var[j].v_double - pow(2, 1023)) / pow(2, 1011));
116
+ if (dot == 0) symbol_name[3] = 0;
117
+ else symbol_name[4] = dot+96;
118
+ symbol_name[5] = 0;
119
+ var = rb_str_intern(ENCODED_STR_NEW2(symbol_name, "ASCII-8BIT"));
120
+ }
121
+ }
122
+
123
+ rb_ary_push(row, var);
124
+ }
125
+ rb_ary_push(data, row);
126
+ }
127
+ rb_hash_aset(r, ENCODED_STR_NEW2("data", "ASCII-8BIT"), data);
128
+
129
+
130
+ VALUE fields = rb_ary_new();
131
+ for (i = 0 ; i < f->nvar ; i++)
132
+ {
133
+ VALUE field = rb_hash_new();
134
+ rb_hash_aset(field, ENCODED_STR_NEW2("id", "ASCII-8BIT"), INT2NUM(i+1));
135
+ rb_hash_aset(field, ENCODED_STR_NEW2("type", "ASCII-8BIT"), INT2NUM(f->typlist[i]));
136
+ rb_hash_aset(field, ENCODED_STR_NEW2("name", "ASCII-8BIT"), ENCODED_STR_NEW2(f->varlist[i], "ASCII-8BIT"));
137
+ rb_hash_aset(field, ENCODED_STR_NEW2("format", "ASCII-8BIT"), ENCODED_STR_NEW2(f->fmtlist[i], "ASCII-8BIT"));
138
+ rb_hash_aset(field, ENCODED_STR_NEW2("variable_label", "ASCII-8BIT"), ENCODED_STR_NEW2(f->variable_labels[i], "ASCII-8BIT"));
139
+ rb_hash_aset(field, ENCODED_STR_NEW2("value_label", "ASCII-8BIT"), ENCODED_STR_NEW2(f->lbllist[i], "ASCII-8BIT"));
140
+ rb_hash_aset(field, ENCODED_STR_NEW2("sort", "ASCII-8BIT"), INT2NUM(f->srtlist[i]));
141
+ rb_ary_push(fields, field);
142
+ }
143
+ rb_hash_aset(r, ENCODED_STR_NEW2("fields", "ASCII-8BIT"), fields);
144
+
145
+
146
+ VALUE vlt = rb_ary_new();
147
+ for (i = 0 ; i < f->num_vlt ; i++)
148
+ {
149
+ VALUE v = rb_hash_new();
150
+ rb_hash_aset(v, ENCODED_STR_NEW2("name", "ASCII-8BIT"), ENCODED_STR_NEW2(f->vlt[i].name, "ASCII-8BIT"));
151
+ VALUE table = rb_ary_new();
152
+ for (j = 0 ; j < f->vlt[i].n ; j++)
153
+ {
154
+ VALUE row = rb_ary_new();
155
+ rb_ary_push(row, INT2NUM(f->vlt[i].val[j]));
156
+ rb_ary_push(row, ENCODED_STR_NEW2(f->vlt[i].txtbuf + f->vlt[i].off[j], "ASCII-8BIT"));
157
+
158
+ rb_ary_push(table, row);
159
+ }
160
+
161
+ rb_hash_aset(v, ENCODED_STR_NEW2("table", "ASCII-8BIT"), table);
162
+ rb_ary_push(vlt, v);
163
+ }
164
+ rb_hash_aset(r, ENCODED_STR_NEW2("value_labels", "ASCII-8BIT"), vlt);
165
+
166
+ free_stata(f);
167
+ return r;
168
+ }
Binary file
Binary file
@@ -0,0 +1,81 @@
1
+
2
+ #include <stdio.h>
3
+ #include <stdlib.h>
4
+ #include <math.h>
5
+ #include <string.h>
6
+ #include <inttypes.h>
7
+
8
+ #include "Stata.h"
9
+ #include "Read.h"
10
+ #include "Write.h"
11
+
12
+ FILE *fp;
13
+ int swap_endian_needed = 0;
14
+ int verbose = 0;
15
+
16
+ char get_host_endian()
17
+ {
18
+ int i = 1;
19
+ char *p = (char *) &i;
20
+ return (p[0] == 1) ? 0x02 : 0x01;
21
+ }
22
+
23
+ int set_error(struct stata_file * f, const char * error)
24
+ {
25
+ f->error = malloc(strlen(error) + 1);
26
+ strcpy(f->error, error);
27
+ return 0;
28
+ }
29
+
30
+ void free_stata(struct stata_file * f)
31
+ {
32
+ long i,j;
33
+
34
+ free(f->filename);
35
+ free(f->error);
36
+ for (i = 0 ; i < f->nvar ; i++) free(f->varlist[i]);
37
+ for (i = 0 ; i < f->nvar ; i++) free(f->fmtlist[i]);
38
+ for (i = 0 ; i < f->nvar ; i++) free(f->lbllist[i]);
39
+ for (i = 0 ; i < f->nvar ; i++) free(f->variable_labels[i]);
40
+ free(f->typlist);
41
+ free(f->varlist);
42
+ free(f->srtlist);
43
+ free(f->fmtlist);
44
+ free(f->lbllist);
45
+ free(f->variable_labels);
46
+
47
+ for (i = 0 ; i < f->nobs ; i++)
48
+ {
49
+ for (j = 0 ; j < f->nvar ; j++)
50
+ if (f->obs[i].var[j].v_type == V_STR)
51
+ free(f->obs[i].var[j].v_str);
52
+ free(f->obs[i].var);
53
+ }
54
+ free(f->obs);
55
+
56
+ for (i = 0 ; i < f->num_vlt ; i++)
57
+ {
58
+ free(f->vlt[i].off);
59
+ free(f->vlt[i].val);
60
+ free(f->vlt[i].txtbuf);
61
+ }
62
+ free(f->vlt);
63
+
64
+ free(f);
65
+ }
66
+
67
+ int main(int argc, char *argv[])
68
+ {
69
+ if (argc != 2) { fprintf(stderr, "usage:\n ./read [file]\n"); return(0); }
70
+
71
+ verbose = 1;
72
+
73
+ char src_file[100];
74
+ char dst_file[100];
75
+ sprintf(src_file, "%s.dta", argv[1]);
76
+ sprintf(dst_file, "%s_resave.dta", argv[1]);
77
+
78
+ struct stata_file * f = read_stata_file(src_file);
79
+ write_stata_file(dst_file, f);
80
+ return 0;
81
+ }
@@ -0,0 +1,72 @@
1
+
2
+ #ifndef STATA_H
3
+ #define STATA_H
4
+
5
+ #include <stdio.h>
6
+ #include <inttypes.h>
7
+
8
+ extern FILE * fp;
9
+ extern int swap_endian_needed;
10
+ extern int verbose;
11
+
12
+ enum stata_type {
13
+ V_INVALID, V_STR, V_BYTE, V_INT, V_LONG, V_FLOAT, V_DOUBLE
14
+ };
15
+
16
+ struct stata_var {
17
+ enum stata_type v_type;
18
+ union {
19
+ char * v_str;
20
+ int8_t v_byte;
21
+ int16_t v_int;
22
+ int32_t v_long;
23
+ float v_float;
24
+ double v_double;
25
+ };
26
+ };
27
+
28
+ struct stata_obs {
29
+ struct stata_var * var;
30
+ };
31
+
32
+ struct stata_vlt {
33
+ char name[33];
34
+ int32_t n;
35
+ int32_t txtlen;
36
+ int32_t * off;
37
+ int32_t * val;
38
+ char * txtbuf;
39
+ };
40
+
41
+ struct stata_file {
42
+ char * filename;
43
+
44
+ int8_t ds_format;
45
+ int8_t byteorder;
46
+ int8_t filetype;
47
+ int8_t unused;
48
+ uint16_t nvar;
49
+ uint32_t nobs;
50
+ char data_label[81];
51
+ char time_stamp[18];
52
+
53
+ char * error;
54
+
55
+ uint8_t * typlist;
56
+ char ** varlist;
57
+ uint16_t * srtlist;
58
+ char ** fmtlist;
59
+ char ** lbllist;
60
+ char ** variable_labels;
61
+
62
+ struct stata_obs * obs;
63
+
64
+ int num_vlt;
65
+ struct stata_vlt * vlt;
66
+ };
67
+
68
+ extern char get_host_endian();
69
+ extern int set_error(struct stata_file * f, const char * error);
70
+ extern void free_stata(struct stata_file * f);
71
+
72
+ #endif
Binary file
@@ -0,0 +1,28 @@
1
+
2
+ #include <ruby.h>
3
+ #include "Stata.h"
4
+
5
+ VALUE method_read(VALUE self, VALUE file);
6
+ VALUE method_write(VALUE self, VALUE file);
7
+ VALUE method_get_verbose(VALUE self);
8
+ VALUE method_set_verbose(VALUE self, VALUE value);
9
+
10
+ void Init_Stata()
11
+ {
12
+ VALUE Stata_module = rb_define_module("Stata");
13
+ rb_define_singleton_method(Stata_module, "read", method_read, 1);
14
+ rb_define_singleton_method(Stata_module, "write", method_write, 2);
15
+ rb_define_singleton_method(Stata_module, "verbose", method_get_verbose, 0);
16
+ rb_define_singleton_method(Stata_module, "verbose=", method_set_verbose, 1);
17
+ }
18
+
19
+ VALUE method_get_verbose(VALUE self)
20
+ {
21
+ return verbose ? Qtrue : Qfalse;
22
+ }
23
+
24
+ VALUE method_set_verbose(VALUE self, VALUE value)
25
+ {
26
+ verbose = RTEST(value);
27
+ return method_get_verbose(self);
28
+ }
Binary file
@@ -0,0 +1,114 @@
1
+
2
+ #include "Write.h"
3
+ #include "Stata.h"
4
+ #include <stdio.h>
5
+ #include <stdlib.h>
6
+ #include <string.h>
7
+ #include <math.h>
8
+
9
+ struct stata_file;
10
+
11
+ int write_stata_file(char * filename, struct stata_file * f)
12
+ {
13
+ if (verbose) printf("write file '%s'\n", filename);
14
+
15
+ long i,j;
16
+
17
+ if (f == NULL) return 0;
18
+
19
+ fp = fopen(filename, "wb");
20
+ if (fp == NULL) return set_error(f, "error opening file");
21
+
22
+ if (f->nvar <= 0) return set_error(f, "nvar should be more then 0");
23
+ if (f->nobs <= 0) return set_error(f, "nobs should be more then 0");
24
+
25
+ /* 5.1 Headers */
26
+ char header[4] = {0x72, get_host_endian(), 0x01, 0x00};
27
+ if (fwrite(header, 4, 1, fp) != 1) return set_error(f, "fwrite to file failed");
28
+ if (fwrite(&f->nvar, sizeof(f->nvar), 1, fp) != 1) return set_error(f, "fwrite to file failed");
29
+ if (fwrite(&f->nobs, sizeof(f->nobs), 1, fp) != 1) return set_error(f, "fwrite to file failed");
30
+ if (fwrite(f->data_label, sizeof(f->data_label), 1, fp) != 1) return set_error(f, "fwrite to file failed");
31
+ f->time_stamp[17] = 0;
32
+ if (fwrite(f->time_stamp, sizeof(f->time_stamp), 1, fp) != 1) return set_error(f, "fwrite to file failed");
33
+
34
+
35
+ /* 5.2 Descriptors */
36
+ if (fwrite(f->typlist, 1, f->nvar, fp) != f->nvar) return set_error(f, "fwrite to file failed");
37
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->varlist[i], 33, 1, fp) != 1) return set_error(f, "fwrite to file failed");
38
+ if (fwrite(f->srtlist, 2, f->nvar+1, fp) != (unsigned int)f->nvar+1) return set_error(f, "fwrite to file failed");
39
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->fmtlist[i], 49, 1, fp) != 1) return set_error(f, "fwrite to file failed");
40
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->lbllist[i], 33, 1, fp) != 1) return set_error(f, "fwrite to file failed");
41
+
42
+
43
+ /* 5.3 Variable Labels */
44
+ for (i = 0 ; i < f->nvar ; i++) if (fwrite(f->variable_labels[i], 81, 1, fp) != 1) return set_error(f, "fwrite to file failed");
45
+
46
+
47
+ /* 5.4 Expansion Fields */
48
+ char zeros[5] = {0,0,0,0,0};
49
+ if (fwrite(zeros, 5, 1, fp) != 1) return set_error(f, "fwrite to file failed");
50
+
51
+
52
+ /* 5.5 Data */
53
+ /*printf(" write 5.5 Data (%dx%d)\n", f->nobs, f->nvar);*/
54
+ for (j = 0 ; j < f->nobs ; j++)
55
+ {
56
+ for (i = 0 ; i < f->nvar ; i++)
57
+ {
58
+ struct stata_var * var = &f->obs[j].var[i];
59
+ if (f->typlist[i] != 0 && f->typlist[i] < 245) { if (fwrite(var->v_str, f->typlist[i], 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
60
+ else if (f->typlist[i] == 251) { if (fwrite(&var->v_byte, sizeof(var->v_byte), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
61
+ else if (f->typlist[i] == 252) { if (fwrite(&var->v_int, sizeof(var->v_int), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
62
+ else if (f->typlist[i] == 253) { if (fwrite(&var->v_long, sizeof(var->v_long), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
63
+ else if (f->typlist[i] == 254) { if (fwrite(&var->v_float, sizeof(var->v_float), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
64
+ else if (f->typlist[i] == 255) { if (fwrite(&var->v_double, sizeof(var->v_double), 1, fp) != 1) return set_error(f, "fwrite to file failed"); }
65
+ }
66
+ }
67
+
68
+
69
+ /* 5.6 Value Labels */
70
+ /*printf(" write 5.6 Value Labels (%d)\n", f->num_vlt);*/
71
+ for (i = 0 ; i < f->num_vlt ; i++)
72
+ {
73
+ struct stata_vlt * vlt = &f->vlt[i];
74
+ uint32_t len = 4 + 4 + 4*vlt->n + 4*vlt->n + vlt->txtlen;
75
+ if (fwrite(&len, sizeof(len), 1, fp) != 1) return set_error(f, "fwrite to file failed");
76
+ if (fwrite(vlt->name, 33, 1, fp) != 1) return set_error(f, "fwrite to file failed");
77
+ if (fwrite(zeros, 3, 1, fp) != 1) return set_error(f, "fwrite to file failed");
78
+ if (fwrite(&vlt->n, sizeof(vlt->n), 1, fp) != 1) return set_error(f, "fwrite to file failed");
79
+ uint32_t txtlen = 0;
80
+ for (j = 0 ; j < vlt->n ; j++)
81
+ txtlen += (int)strlen(vlt->txtbuf + vlt->off[j]) + 1;
82
+
83
+ if (fwrite(&txtlen, sizeof(txtlen), 1, fp) != 1) return set_error(f, "fwrite to file failed");
84
+ if (fwrite(vlt->off, sizeof(uint32_t), vlt->n, fp) != (unsigned int)vlt->n) return set_error(f, "fwrite to file failed");
85
+ if (fwrite(vlt->val, sizeof(uint32_t), vlt->n, fp) != (unsigned int)vlt->n) return set_error(f, "fwrite to file failed");
86
+ if (fwrite(vlt->txtbuf, txtlen, 1, fp) != 1) return set_error(f, "fwrite to file failed");
87
+ }
88
+
89
+ fclose(fp);
90
+ return 0;
91
+ }
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
@@ -0,0 +1,10 @@
1
+
2
+ #ifndef STATA_WRITE_H
3
+ #define STATA_WRITE_H
4
+
5
+ #include <inttypes.h>
6
+
7
+ struct stata_file;
8
+ extern int write_stata_file(char * filename, struct stata_file * file);
9
+
10
+ #endif
Binary file
@@ -0,0 +1,259 @@
1
+
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <math.h>
5
+ #include <ruby.h>
6
+ #include "Stata.h"
7
+ #include "Write.h"
8
+
9
+ #ifndef RUBY_19
10
+ #ifndef RFLOAT_VALUE
11
+ #define RFLOAT_VALUE(v) (RFLOAT(v)->value)
12
+ #endif
13
+ #ifndef RARRAY_LEN
14
+ #define RARRAY_LEN(v) (RARRAY(v)->len)
15
+ #endif
16
+ #ifndef RARRAY_PTR
17
+ #define RARRAY_PTR(v) (RARRAY(v)->ptr)
18
+ #endif
19
+ #endif
20
+
21
+ int populate_fields_from_ruby_index = 0;
22
+ VALUE populate_fields_from_ruby(VALUE field, struct stata_file * f)
23
+ {
24
+ VALUE v;
25
+
26
+ v = rb_hash_aref(field, rb_str_new2("type"));
27
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "field type is not provided or is not a Fixnum");
28
+ f->typlist[populate_fields_from_ruby_index] = NUM2INT(v);
29
+
30
+ v = rb_hash_aref(field, rb_str_new2("name"));
31
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field name is not provided or is not a Fixnum");
32
+ f->varlist[populate_fields_from_ruby_index] = (char*)malloc(33);
33
+ strncpy(f->varlist[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 33);
34
+
35
+ v = rb_hash_aref(field, rb_str_new2("sort"));
36
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "field sort is not provided or is not a Fixnum");
37
+ f->srtlist[populate_fields_from_ruby_index] = NUM2INT(v);
38
+
39
+ v = rb_hash_aref(field, rb_str_new2("format"));
40
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field format is not provided or is not a Fixnum");
41
+ f->fmtlist[populate_fields_from_ruby_index] = (char*)malloc(49);
42
+ strncpy(f->fmtlist[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 49);
43
+
44
+ v = rb_hash_aref(field, rb_str_new2("value_label"));
45
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field value_label is not provided or is not a String");
46
+ f->lbllist[populate_fields_from_ruby_index] = (char*)malloc(33);
47
+ strncpy(f->lbllist[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 33);
48
+
49
+ v = rb_hash_aref(field, rb_str_new2("variable_label"));
50
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "field variable_label is not provided or is not a String");
51
+ f->variable_labels[populate_fields_from_ruby_index] = (char*)malloc(81);
52
+ strncpy(f->variable_labels[populate_fields_from_ruby_index], rb_string_value_cstr(&v), 81);
53
+
54
+ populate_fields_from_ruby_index++;
55
+ }
56
+
57
+ int populate_data_from_ruby_index = 0;
58
+ VALUE populate_data_from_ruby(VALUE row, struct stata_file * f)
59
+ {
60
+ VALUE v;
61
+ int j = 0;//, i = populate_data_from_ruby_index;
62
+ for (j = 0 ; j < f->nvar ; j++)
63
+ {
64
+ v = rb_ary_entry(row, j);
65
+ struct stata_var * var = &f->obs[populate_data_from_ruby_index].var[j];
66
+
67
+ if (f->typlist[j] == 255) var->v_type = V_DOUBLE;
68
+ else if (f->typlist[j] == 254) var->v_type = V_FLOAT;
69
+ else if (f->typlist[j] == 253) var->v_type = V_LONG;
70
+ else if (f->typlist[j] == 252) var->v_type = V_INT;
71
+ else if (f->typlist[j] == 251) var->v_type = V_BYTE;
72
+ else if (f->typlist[j] <= 244) var->v_type = f->typlist[j];
73
+ else rb_raise(rb_eArgError, "type is %d", f->typlist[j]);
74
+
75
+ switch (TYPE(v)) {
76
+ case T_SYMBOL:
77
+ v = rb_str_new2(rb_id2name(SYM2ID(v)));
78
+ const char * symbol_name = RSTRING_PTR(v);
79
+
80
+ int dot = 0;
81
+ if (strlen(symbol_name) == 5) dot = symbol_name[4] - 96;
82
+ if (dot < 0 || dot > 26) { rb_raise(rb_eArgError, "INVALID SYMBOL '%s'", symbol_name); continue; }
83
+
84
+ if (f->typlist[j] == 255) var->v_double = pow(2, 1023) + dot*pow(2, 1011);
85
+ else if (f->typlist[j] == 254) var->v_float = (float)pow(2, 127) + dot*(float)pow(2, 115);
86
+ else if (f->typlist[j] == 253) var->v_long = 2147483621 + dot;
87
+ else if (f->typlist[j] == 252) var->v_int = 32741 + dot;
88
+ else if (f->typlist[j] == 251) var->v_byte = 101 + dot;
89
+ else rb_raise(rb_eArgError, "invalid typlist '%d' %d", f->typlist[j], TYPE(v));
90
+ break;
91
+ case T_BIGNUM:
92
+ case T_FIXNUM:
93
+ case T_FLOAT:
94
+ if (f->typlist[j] == 255) var->v_double = rb_num2dbl(v);
95
+ else if (f->typlist[j] == 254) var->v_float = (float)rb_num2dbl(v);
96
+ else if (f->typlist[j] == 253) var->v_long = (int32_t)FIX2LONG(v);
97
+ else if (f->typlist[j] == 252) var->v_int = FIX2LONG(v);
98
+ else if (f->typlist[j] == 251) var->v_byte = FIX2LONG(v);
99
+ else rb_raise(rb_eArgError, "invalid typlist '%d' %d %f", f->typlist[j], TYPE(v), RFLOAT_VALUE(v));
100
+ break;
101
+ case T_STRING:
102
+ var->v_type = f->typlist[j];
103
+ var->v_str = (char*)malloc(f->typlist[j]+1);
104
+ strncpy(var->v_str, RSTRING_PTR(v), f->typlist[j]+1);
105
+ break;
106
+ case T_NIL:
107
+ rb_raise(rb_eArgError, "nil value submitted");
108
+ break;
109
+ default:
110
+ rb_raise(rb_eArgError, "unsupported ruby type: %d", TYPE(v));
111
+ break;
112
+ }
113
+ }
114
+ populate_data_from_ruby_index++;
115
+ }
116
+
117
+ int populate_value_labels_from_ruby_index = 0;
118
+ VALUE populate_value_labels_from_ruby(VALUE r_vlt, struct stata_file * f)
119
+ {
120
+ VALUE v;
121
+
122
+ f->num_vlt++;
123
+ f->vlt = (struct stata_vlt *)realloc(f->vlt, sizeof(struct stata_vlt)*f->num_vlt);
124
+ struct stata_vlt * vlt = &f->vlt[f->num_vlt-1];
125
+
126
+ if (TYPE(r_vlt) != T_HASH) rb_raise(rb_eArgError, "Value label table should be a Hash");
127
+
128
+ v = rb_hash_aref(r_vlt, rb_str_new2("name"));
129
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "Value label table name isn't provided or isn't a String");
130
+ strncpy(vlt->name, RSTRING_PTR(v), 33);
131
+
132
+ v = rb_hash_aref(r_vlt, rb_str_new2("table"));
133
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "Value label table name isn't provided or isn't an Array");
134
+
135
+ vlt->n = (int32_t)RARRAY_LEN(v);
136
+ vlt->txtlen = 0;
137
+ vlt->off = (uint32_t*)malloc(sizeof(uint32_t)*vlt->n);
138
+ vlt->val = (uint32_t*)malloc(sizeof(uint32_t)*vlt->n);
139
+ vlt->txtbuf = NULL;
140
+
141
+ int i;
142
+ for (i = 0 ; i < RARRAY_LEN(v) ; i++)
143
+ {
144
+ VALUE r = rb_ary_entry(v, i);
145
+ if (TYPE(r) != T_ARRAY) rb_raise(rb_eArgError, "value label contains a row which isn't an Array");
146
+ if (TYPE(rb_ary_entry(r, 0)) != T_FIXNUM) rb_raise(rb_eArgError, "value label contains a value which isn't a Fixnum");
147
+ if (TYPE(rb_ary_entry(r, 1)) != T_STRING) rb_raise(rb_eArgError, "value label contains a label which isn't a String");
148
+ char * txt = RSTRING_PTR(rb_ary_entry(r, 1));
149
+ vlt->txtlen += (int32_t)strlen(txt)+1;
150
+ }
151
+ vlt->txtbuf = (char*)malloc(vlt->txtlen);
152
+
153
+ vlt->txtlen = 0;
154
+ for (i = 0 ; i < RARRAY_LEN(v) ; i++)
155
+ {
156
+ VALUE r = rb_ary_entry(v, i);
157
+ vlt->val[i] = NUM2INT(rb_ary_entry(r, 0));
158
+ char * txt = RSTRING_PTR(rb_ary_entry(r, 1));
159
+ vlt->txtlen += (int32_t)strlen(txt)+1;
160
+
161
+ vlt->off[i] = vlt->txtlen-((int32_t)strlen(txt)+1);
162
+ memcpy(vlt->txtbuf+vlt->off[i], txt, strlen(txt)+1);
163
+ }
164
+
165
+ populate_value_labels_from_ruby_index++;
166
+ }
167
+
168
+ VALUE method_write(VALUE self, VALUE filename, VALUE data)
169
+ {
170
+ VALUE v;
171
+ if (TYPE(data) != T_HASH) rb_raise(rb_eArgError, "Content to be written should be a hash");
172
+ if (TYPE(filename) != T_STRING) rb_raise(rb_eArgError, "Filename for writing is not a string");
173
+
174
+ if (rb_hash_aref(data, rb_str_new2("nvar")) == Qnil) rb_raise(rb_eArgError, "nvar is required");
175
+ if (rb_hash_aref(data, rb_str_new2("nobs")) == Qnil) rb_raise(rb_eArgError, "nobs is required");
176
+ if (rb_hash_aref(data, rb_str_new2("fields")) == Qnil) rb_raise(rb_eArgError, "no fields provided");
177
+ if (rb_hash_aref(data, rb_str_new2("data")) == Qnil) rb_raise(rb_eArgError, "no data provided");
178
+
179
+ struct stata_file * f = (struct stata_file *)malloc(sizeof(struct stata_file));
180
+ if (f == NULL) rb_raise(rb_eArgError, "Could not allocate memory for the stata file");
181
+ memset(f, 0, sizeof(struct stata_file));
182
+
183
+
184
+ /* 5.1 Headers */
185
+ v = rb_hash_aref(data, rb_str_new2("nvar"));
186
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "nvar is not provided or is not a fixnum");
187
+ f->nvar = NUM2UINT(v);
188
+
189
+ v = rb_hash_aref(data, rb_str_new2("nobs"));
190
+ if (TYPE(v) != T_FIXNUM) rb_raise(rb_eArgError, "nobs is not provided or is not a fixnum");
191
+ f->nobs = NUM2UINT(v);
192
+
193
+ v = rb_hash_aref(data, rb_str_new2("data_label"));
194
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "data_label is not provided or is not a string");
195
+ strncpy(f->data_label, rb_string_value_cstr(&v), sizeof(f->data_label));
196
+
197
+ v = rb_hash_aref(data, rb_str_new2("time_stamp"));
198
+ if (TYPE(v) != T_STRING) rb_raise(rb_eArgError, "time_stamp is not provided or is not a string");
199
+ strncpy(f->time_stamp, rb_string_value_cstr(&v), sizeof(f->time_stamp));
200
+
201
+
202
+ /* 5.2 and 5.3, Descriptors and Variable Labels */
203
+ f->typlist = (uint8_t *)malloc(f->nvar);
204
+ if (f->typlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
205
+ f->varlist = (char **)malloc(sizeof(char *)*f->nvar);
206
+ if (f->varlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
207
+ f->srtlist = (uint16_t *)malloc(sizeof(uint16_t)*(f->nvar+1));
208
+ if (f->srtlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
209
+ f->fmtlist = (char **)malloc(sizeof(char *)*f->nvar);
210
+ if (f->fmtlist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
211
+ f->lbllist = (char **)malloc(sizeof(char *)*f->nvar);
212
+ if (f->lbllist == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
213
+ f->variable_labels = (char **)malloc(sizeof(char *)*f->nvar);
214
+ if (f->variable_labels == NULL) rb_raise(rb_eRuntimeError, "Could not allocate more memory");
215
+
216
+ v = rb_hash_aref(data, rb_str_new2("fields"));
217
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "fields are not provided or are not an array");
218
+
219
+ populate_fields_from_ruby_index = 0;
220
+ rb_iterate(rb_each, v, populate_fields_from_ruby, (VALUE)f);
221
+
222
+
223
+ /* 5.3 Expansion Fields - nothing comes from ruby */
224
+
225
+
226
+ /* 5.4 Data */
227
+ long i, j;
228
+ f->obs = (struct stata_obs *)malloc(sizeof(struct stata_obs)*f->nobs);
229
+ for (j = 0 ; j < f->nobs ; j++)
230
+ {
231
+ f->obs[j].var = (struct stata_var *)malloc(sizeof(struct stata_var)*f->nvar);
232
+ for (i = 0 ; i < f->nvar ; i++)
233
+ {
234
+ struct stata_var * var = &f->obs[j].var[i];
235
+ memset(var, 0, sizeof(struct stata_var));
236
+ }
237
+ }
238
+ v = rb_hash_aref(data, rb_str_new2("data"));
239
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "data is not provided or is not an array");
240
+
241
+ populate_data_from_ruby_index = 0;
242
+ rb_iterate(rb_each, v, populate_data_from_ruby, (VALUE)f);
243
+
244
+
245
+ /* 5.5 Value Label Tables */
246
+ v = rb_hash_aref(data, rb_str_new2("value_labels"));
247
+ if (TYPE(v) != T_ARRAY) rb_raise(rb_eArgError, "value labels are not provided or are not an array");
248
+
249
+ populate_value_labels_from_ruby_index = 0;
250
+ rb_iterate(rb_each, v, populate_value_labels_from_ruby, (VALUE)f);
251
+
252
+ write_stata_file(RSTRING_PTR(filename), f);
253
+
254
+ if (f->error) rb_raise(rb_eRuntimeError, "%s", f->error);
255
+
256
+ free_stata(f);
257
+
258
+ return INT2NUM(1);
259
+ }
Binary file
@@ -0,0 +1,11 @@
1
+ # Loads mkmf which is used to make makefiles for Ruby extensions
2
+ require 'mkmf'
3
+
4
+ # Give it a name
5
+ extension_name = 'Stata'
6
+
7
+ # The destination
8
+ dir_config(extension_name)
9
+
10
+ # Do the work
11
+ create_makefile(extension_name)
Binary file
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby -w
2
+ #encoding:ASCII-8BIT
3
+
4
+ $LOAD_PATH << '.'
5
+ require 'Stata'
6
+
7
+ Stata.verbose = true
8
+
9
+ files = if ARGV.empty?
10
+ Dir.glob("test_file_*.dta")
11
+ else
12
+ ARGV
13
+ end
14
+
15
+ if files.empty?
16
+ puts "usage: #{$0} file1.dta [file2.dta ...]"
17
+ exit(-1)
18
+ end
19
+
20
+ files.each do |original|
21
+ if original =~ /_resave/
22
+ puts "skipping #{original.inspect}\n\n"
23
+ next
24
+ end
25
+
26
+ ext = File.extname(original)
27
+ base = original[0..-(ext.length+1)]
28
+ resave = "#{base}_resave#{ext}"
29
+
30
+ s1 = Stata.read(original)
31
+ Stata.write(resave, s1)
32
+
33
+ s2 = Stata.read(resave)
34
+
35
+ ['data', 'fields', 'value_labels'].each do |f|
36
+ if (s1[f] != s2[f])
37
+ puts "ERROR '#{file}' #{f} did not read/write the same!"
38
+ end
39
+ end
40
+
41
+ puts "\n"
42
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: stata
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Kevin Branigan
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-01-18 00:00:00 +00:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 2
30
+ - 4
31
+ version: "2.4"
32
+ type: :development
33
+ version_requirements: *id001
34
+ description:
35
+ email:
36
+ executables: []
37
+
38
+ extensions:
39
+ - ext/extconf.rb
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - ext/extconf.rb
44
+ - ext/Makefile
45
+ - ext/Makefile_c
46
+ - ext/Read.c
47
+ - ext/Read.h
48
+ - ext/Read.o
49
+ - ext/Read.rb.c
50
+ - ext/Read.rb.o
51
+ - ext/Stata.bundle
52
+ - ext/Stata.c
53
+ - ext/Stata.h
54
+ - ext/Stata.o
55
+ - ext/Stata.rb.c
56
+ - ext/Stata.rb.o
57
+ - ext/stata_c_test
58
+ - ext/test.rb
59
+ - ext/Write.c
60
+ - ext/Write.h
61
+ - ext/Write.o
62
+ - ext/Write.rb.c
63
+ - ext/Write.rb.o
64
+ - LICENSE
65
+ - README
66
+ has_rdoc: true
67
+ homepage: http://github.com/unspace/stata
68
+ licenses:
69
+ - MIT
70
+ post_install_message:
71
+ rdoc_options: []
72
+
73
+ require_paths:
74
+ - ext
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ segments:
81
+ - 0
82
+ version: "0"
83
+ required_rubygems_version: !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ segments:
89
+ - 0
90
+ version: "0"
91
+ requirements: []
92
+
93
+ rubyforge_project:
94
+ rubygems_version: 1.3.7
95
+ signing_key:
96
+ specification_version: 3
97
+ summary: Read and write support for the Stata binary format
98
+ test_files: []
99
+