RubyGems - s2 - Versions diffs - 0.1.0 - Mend

s2 0.1.0

Files changed (23) hide show

checksums.yaml +7 -0
data/Rakefile +22 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/ext/s2/s2_parse/Makefile +52 -0
data/ext/s2/s2_parse/Parser.cpp +525 -0
data/ext/s2/s2_parse/Parser.h +130 -0
data/ext/s2/s2_parse/S2.hpp +246 -0
data/ext/s2/s2_parse/Scanner.cpp +796 -0
data/ext/s2/s2_parse/Scanner.h +263 -0
data/ext/s2/s2_parse/extconf.rb +5 -0
data/ext/s2/s2_parse/parse_s2.cpp +630 -0
data/ext/s2/s2_parse/parse_s2.hpp +42 -0
data/ext/s2/s2_parse/picojson.hpp +1299 -0
data/ext/s2/s2_parse/s2.atg +321 -0
data/ext/s2/s2_parse/s2.ruco +93 -0
data/ext/s2/s2_parse/s2_parse.cpp +70 -0
data/lib/s2.rb +5 -0
data/lib/s2/display.rb +21 -0
data/lib/s2/internal/c.rb +128 -0
data/lib/s2/s2_parse.rb +18 -0
data/lib/s2/version.rb +3 -0
metadata +151 -0

@@ -0,0 +1,130 @@
+#if !defined(S2_COCO_PARSER_H__)
+#define S2_COCO_PARSER_H__
+#include <iostream>
+#include <memory>
+#include "S2.hpp"
+#include "Scanner.h"
+namespace S2 {
+class ParserException {
+	int line,col;
+	std::wstring message;
+public:
+	ParserException(int line, int col, std::wstring message) :
+	line(line), col(col), message(message)
+	{
+	}
+	int LineNumber() const
+	{
+		return line;
+	}
+	int ColumnNumber() const
+	{
+		return col;
+	}
+	std::wstring GetMessage() const
+	{
+		return message;
+	}
+};
+class Errors {
+public:
+	int count;			// number of errors detected
+	std::vector<ParserException> warnings;
+	Errors();
+	void SynErr(int line, int col, int n);
+	void Error(int line, int col, const wchar_t *s);
+	void Warning(int line, int col, const wchar_t *s);
+	void Warning(const wchar_t *s);
+	void Exception(const wchar_t *s);
+}; // Errors
+class Parser {
+private:
+	enum {
+		_EOF=0,
+		_pascalcase=1,
+		_camelcase=2,
+		_number=3,
+		_hexinteger=4,
+		_string=5,
+		_badString=6,
+		_char=7,
+		_endOfLine=8,
+		_customTokenTypeVariable=9,
+		_ddtSym=22,
+		_optionSym=23
+	};
+	int maxT;
+	Token *dummyToken;
+	int errDist;
+	int minErrDist;
+	void SynErr(int n);
+	void Get();
+	void Expect(int n);
+	bool StartOf(int s);
+	void ExpectWeak(int n, int follow);
+	bool WeakSeparator(int n, int syFol, int repFol);
+public:
+	Scanner *scanner;
+	Errors  *errors;
+	Token *t;			// last recognized token
+	Token *la;			// lookahead token
+S2Ptr s2;
+	Parser(Scanner *scanner);
+	~Parser();
+	void SemErr(const wchar_t* msg);
+	void S2();
+	void Statement(StatementPtr& production);
+	void TypeVariable(TypeVariablePtr& production);
+	void StructName(StructNamePtr& production);
+	void MemberName(MemberNamePtr& production);
+	void NumberLiteral(NumberLiteralPtr& production);
+	void StringLiteral(StringLiteralPtr& production);
+	void TypeIdentifier(TypeIdentifierPtr& production);
+	void TypeParameterArguments(TypeParameterArgumentsPtr& production);
+	void TypeExpression(TypeExpressionPtr& production);
+	void TypeDeclaration(TypeDeclarationPtr& production);
+	void TypeParameters(TypeParametersPtr& production);
+	void NumberLit(NumberLitPtr& production);
+	void Expression(ExpressionPtr& production);
+	void AttributeParam(AttributeParamPtr& production);
+	void AttributeParamList(AttributeParamListPtr& production);
+	void Attribute(AttributePtr& production);
+	void Member(MemberPtr& production);
+	void Structure(StructurePtr& production);
+	void Import(ImportPtr& production);
+	void Parse();
+}; // end Parser
+} // namespace
+#endif

data/ext/s2/s2_parse/S2.hpp ADDED

@@ -0,0 +1,246 @@
+#ifndef S2_HPP
+#define S2_HPP
+/*
+	WARNING: This file is generated using ruco. Please modify the .ruco file if you wish to change anything
+	https://github.com/davidsiaw/ruco
+*/
+#include <string>
+#include <memory>
+#include <vector>
+namespace S2
+{
+enum StatementType
+{
+	STRUCTURE_STATEMENT,
+	IMPORT_STATEMENT
+};
+class Statement
+{
+public:
+	unsigned _line, _col;
+	virtual StatementType get_statement_type() const = 0;
+};
+typedef std::shared_ptr<Statement> StatementPtr;
+typedef std::vector<StatementPtr> StatementArray;
+class S2
+{
+public:
+	unsigned _line, _col;
+	StatementArray statements;
+};
+typedef std::shared_ptr<S2> S2Ptr;
+typedef std::vector<S2Ptr> S2Array;
+class StructName
+{
+public:
+	unsigned _line, _col;
+	std::wstring content;
+};
+typedef std::shared_ptr<StructName> StructNamePtr;
+typedef std::vector<StructNamePtr> StructNameArray;
+class TypeVariable
+{
+public:
+	unsigned _line, _col;
+	std::wstring content;
+};
+typedef std::shared_ptr<TypeVariable> TypeVariablePtr;
+typedef std::vector<TypeVariablePtr> TypeVariableArray;
+enum TypeExpressionType
+{
+	TYPEIDENTIFIER_TYPEEXPRESSION
+};
+class TypeExpression
+{
+public:
+	unsigned _line, _col;
+	virtual TypeExpressionType get_typeexpression_type() const = 0;
+};
+typedef std::shared_ptr<TypeExpression> TypeExpressionPtr;
+typedef std::vector<TypeExpressionPtr> TypeExpressionArray;
+class TypeParameterArguments
+{
+public:
+	unsigned _line, _col;
+	TypeExpressionArray typeexpressions;
+};
+typedef std::shared_ptr<TypeParameterArguments> TypeParameterArgumentsPtr;
+typedef std::vector<TypeParameterArgumentsPtr> TypeParameterArgumentsArray;
+class TypeIdentifier : public TypeExpression
+{
+public:
+	unsigned _line, _col;
+	StructNamePtr structname;
+	TypeVariablePtr typevariable;
+	TypeParameterArgumentsArray typeparameterarguments;
+	virtual TypeExpressionType get_typeexpression_type() const
+	{
+		return TYPEIDENTIFIER_TYPEEXPRESSION;
+	}
+};
+typedef std::shared_ptr<TypeIdentifier> TypeIdentifierPtr;
+typedef std::vector<TypeIdentifierPtr> TypeIdentifierArray;
+class TypeParameters
+{
+public:
+	unsigned _line, _col;
+	TypeVariableArray typevariables;
+};
+typedef std::shared_ptr<TypeParameters> TypeParametersPtr;
+typedef std::vector<TypeParametersPtr> TypeParametersArray;
+class TypeDeclaration
+{
+public:
+	unsigned _line, _col;
+	StructNamePtr structname;
+	TypeParametersArray typeparameters;
+};
+typedef std::shared_ptr<TypeDeclaration> TypeDeclarationPtr;
+typedef std::vector<TypeDeclarationPtr> TypeDeclarationArray;
+class NumberLiteral
+{
+public:
+	unsigned _line, _col;
+	std::wstring content;
+};
+typedef std::shared_ptr<NumberLiteral> NumberLiteralPtr;
+typedef std::vector<NumberLiteralPtr> NumberLiteralArray;
+enum ExpressionType
+{
+	NUMBERLIT_EXPRESSION
+};
+class Expression
+{
+public:
+	unsigned _line, _col;
+	virtual ExpressionType get_expression_type() const = 0;
+};
+typedef std::shared_ptr<Expression> ExpressionPtr;
+typedef std::vector<ExpressionPtr> ExpressionArray;
+class NumberLit : public Expression
+{
+public:
+	unsigned _line, _col;
+	NumberLiteralPtr numberliteral;
+	virtual ExpressionType get_expression_type() const
+	{
+		return NUMBERLIT_EXPRESSION;
+	}
+};
+typedef std::shared_ptr<NumberLit> NumberLitPtr;
+typedef std::vector<NumberLitPtr> NumberLitArray;
+class MemberName
+{
+public:
+	unsigned _line, _col;
+	std::wstring content;
+};
+typedef std::shared_ptr<MemberName> MemberNamePtr;
+typedef std::vector<MemberNamePtr> MemberNameArray;
+class AttributeParam
+{
+public:
+	unsigned _line, _col;
+	MemberNamePtr membername;
+	ExpressionPtr expression;
+};
+typedef std::shared_ptr<AttributeParam> AttributeParamPtr;
+typedef std::vector<AttributeParamPtr> AttributeParamArray;
+class AttributeParamList
+{
+public:
+	unsigned _line, _col;
+	AttributeParamArray attributeparams;
+};
+typedef std::shared_ptr<AttributeParamList> AttributeParamListPtr;
+typedef std::vector<AttributeParamListPtr> AttributeParamListArray;
+class Attribute
+{
+public:
+	unsigned _line, _col;
+	TypeExpressionPtr typeexpression;
+	AttributeParamListArray attributeparamlists;
+};
+typedef std::shared_ptr<Attribute> AttributePtr;
+typedef std::vector<AttributePtr> AttributeArray;
+class Member
+{
+public:
+	unsigned _line, _col;
+	AttributeArray attributes;
+	TypeIdentifierPtr typeidentifier;
+	MemberNameArray membernames;
+};
+typedef std::shared_ptr<Member> MemberPtr;
+typedef std::vector<MemberPtr> MemberArray;
+class Structure : public Statement
+{
+public:
+	unsigned _line, _col;
+	AttributeArray attributes;
+	TypeDeclarationPtr typedeclaration;
+	MemberArray members;
+	virtual StatementType get_statement_type() const
+	{
+		return STRUCTURE_STATEMENT;
+	}
+};
+typedef std::shared_ptr<Structure> StructurePtr;
+typedef std::vector<StructurePtr> StructureArray;
+class StringLiteral
+{
+public:
+	unsigned _line, _col;
+	std::wstring content;
+};
+typedef std::shared_ptr<StringLiteral> StringLiteralPtr;
+typedef std::vector<StringLiteralPtr> StringLiteralArray;
+class Import : public Statement
+{
+public:
+	unsigned _line, _col;
+	StringLiteralPtr stringliteral;
+	virtual StatementType get_statement_type() const
+	{
+		return IMPORT_STATEMENT;
+	}
+};
+typedef std::shared_ptr<Import> ImportPtr;
+typedef std::vector<ImportPtr> ImportArray;
+}
+#endif // S2_HPP

data/ext/s2/s2_parse/Scanner.cpp ADDED

@@ -0,0 +1,796 @@
+#include <memory.h>
+#include <string.h>
+#include "Scanner.h"
+namespace S2 {
+// string handling, wide character
+wchar_t* coco_string_create(const wchar_t* value) {
+	return coco_string_create(value, 0);
+}
+wchar_t* coco_string_create(const wchar_t *value, int startIndex) {
+	int valueLen = 0;
+	int len = 0;
+	if (value) {
+		valueLen = wcslen(value);
+		len = valueLen - startIndex;
+	}
+	return coco_string_create(value, startIndex, len);
+}
+wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) {
+	int len = 0;
+	wchar_t* data;
+	if (value) { len = length; }
+	data = new wchar_t[len + 1];
+	wcsncpy(data, &(value[startIndex]), len);
+	data[len] = 0;
+	return data;
+}
+wchar_t* coco_string_create_upper(const wchar_t* data) {
+	if (!data) { return NULL; }
+	int dataLen = 0;
+	if (data) { dataLen = wcslen(data); }
+	wchar_t *newData = new wchar_t[dataLen + 1];
+	for (int i = 0; i <= dataLen; i++) {
+		if ((L'a' <= data[i]) && (data[i] <= L'z')) {
+			newData[i] = data[i] + (L'A' - L'a');
+		}
+		else { newData[i] = data[i]; }
+	}
+	newData[dataLen] = L'\0';
+	return newData;
+}
+wchar_t* coco_string_create_lower(const wchar_t* data) {
+	if (!data) { return NULL; }
+	int dataLen = wcslen(data);
+	return coco_string_create_lower(data, 0, dataLen);
+}
+wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) {
+	if (!data) { return NULL; }
+	wchar_t* newData = new wchar_t[dataLen + 1];
+	for (int i = 0; i <= dataLen; i++) {
+		wchar_t ch = data[startIndex + i];
+		if ((L'A' <= ch) && (ch <= L'Z')) {
+			newData[i] = ch - (L'A' - L'a');
+		}
+		else { newData[i] = ch; }
+	}
+	newData[dataLen] = L'\0';
+	return newData;
+}
+wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) {
+	wchar_t* data;
+	int data1Len = 0;
+	int data2Len = 0;
+	if (data1) { data1Len = wcslen(data1); }
+	if (data2) {data2Len = wcslen(data2); }
+	data = new wchar_t[data1Len + data2Len + 1];
+	if (data1) { wcscpy(data, data1); }
+	if (data2) { wcscpy(data + data1Len, data2); }
+	data[data1Len + data2Len] = 0;
+	return data;
+}
+wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) {
+	int targetLen = coco_string_length(target);
+	wchar_t* data = new wchar_t[targetLen + 2];
+	wcsncpy(data, target, targetLen);
+	data[targetLen] = appendix;
+	data[targetLen + 1] = 0;
+	return data;
+}
+void coco_string_delete(wchar_t* &data) {
+	delete [] data;
+	data = NULL;
+}
+int coco_string_length(const wchar_t* data) {
+	if (data) { return wcslen(data); }
+	return 0;
+}
+bool coco_string_endswith(const wchar_t* data, const wchar_t *end) {
+	int dataLen = wcslen(data);
+	int endLen = wcslen(end);
+	return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0);
+}
+int coco_string_indexof(const wchar_t* data, const wchar_t value) {
+	const wchar_t* chr = wcschr(data, value);
+	if (chr) { return (chr-data); }
+	return -1;
+}
+int coco_string_lastindexof(const wchar_t* data, const wchar_t value) {
+	const wchar_t* chr = wcsrchr(data, value);
+	if (chr) { return (chr-data); }
+	return -1;
+}
+void coco_string_merge(wchar_t* &target, const wchar_t* appendix) {
+	if (!appendix) { return; }
+	wchar_t* data = coco_string_create_append(target, appendix);
+	delete [] target;
+	target = data;
+}
+bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) {
+	return wcscmp( data1, data2 ) == 0;
+}
+int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) {
+	return wcscmp(data1, data2);
+}
+int coco_string_hash(const wchar_t *data) {
+	int h = 0;
+	if (!data) { return 0; }
+	while (*data != 0) {
+		h = (h * 7) ^ *data;
+		++data;
+	}
+	if (h < 0) { h = -h; }
+	return h;
+}
+// string handling, ascii character
+wchar_t* coco_string_create(const char* value) {
+	int len = 0;
+	if (value) { len = strlen(value); }
+	wchar_t* data = new wchar_t[len + 1];
+	for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; }
+	data[len] = 0;
+	return data;
+}
+char* coco_string_create_char(const wchar_t *value) {
+	int len = coco_string_length(value);
+	char *res = new char[len + 1];
+	for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; }
+	res[len] = 0;
+	return res;
+}
+void coco_string_delete(char* &data) {
+	delete [] data;
+	data = NULL;
+}
+Token::Token() {
+	kind = 0;
+	pos  = 0;
+	col  = 0;
+	line = 0;
+	val  = NULL;
+	next = NULL;
+}
+Token::~Token() {
+	coco_string_delete(val);
+}
+Buffer::Buffer(FILE* s, bool isUserStream) {
+// ensure binary read on windows
+#if _MSC_VER >= 1300
+	_setmode(_fileno(s), _O_BINARY);
+#endif
+	stream = s; this->isUserStream = isUserStream;
+	if (CanSeek()) {
+		fseek(s, 0, SEEK_END);
+		fileLen = ftell(s);
+		fseek(s, 0, SEEK_SET);
+		bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH;
+		bufStart = INT_MAX; // nothing in the buffer so far
+	} else {
+		fileLen = bufLen = bufStart = 0;
+	}
+	bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH;
+	buf = new char[bufCapacity];
+	if (fileLen > 0) SetPos(0);          // setup  buffer to position 0 (start)
+	else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
+	if (bufLen == fileLen && CanSeek()) Close();
+}
+Buffer::Buffer(Buffer *b) {
+	buf = b->buf;
+	bufCapacity = b->bufCapacity;
+	b->buf = NULL;
+	bufStart = b->bufStart;
+	bufLen = b->bufLen;
+	fileLen = b->fileLen;
+	bufPos = b->bufPos;
+	stream = b->stream;
+	b->stream = NULL;
+	isUserStream = b->isUserStream;
+}
+Buffer::Buffer(const char* buf, size_t len) {
+	this->buf = new char[len];
+	memcpy(this->buf, buf, len*sizeof(unsigned char));
+	bufStart = 0;
+	bufCapacity = bufLen = len;
+	fileLen = len;
+	bufPos = 0;
+	stream = NULL;
+}
+Buffer::~Buffer() {
+	Close();
+	if (buf != NULL) {
+		delete [] buf;
+		buf = NULL;
+	}
+}
+void Buffer::Close() {
+	if (!isUserStream && stream != NULL) {
+		fclose(stream);
+		stream = NULL;
+	}
+}
+int Buffer::Read() {
+	if (bufPos < bufLen) {
+		return buf[bufPos++];
+	} else if (GetPos() < fileLen) {
+		SetPos(GetPos()); // shift buffer start to Pos
+		return buf[bufPos++];
+	} else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) {
+		return buf[bufPos++];
+	} else {
+		return EoF;
+	}
+}
+int Buffer::Peek() {
+	int curPos = GetPos();
+	int ch = Read();
+	SetPos(curPos);
+	return ch;
+}
+// beg .. begin, zero-based, inclusive, in byte
+// end .. end, zero-based, exclusive, in byte
+wchar_t* Buffer::GetString(int beg, int end) {
+	int len = 0;
+	wchar_t *buf = new wchar_t[end - beg];
+	int oldPos = GetPos();
+	SetPos(beg);
+	while (GetPos() < end) buf[len++] = (wchar_t) Read();
+	SetPos(oldPos);
+	wchar_t *res = coco_string_create(buf, 0, len);
+	coco_string_delete(buf);
+	return res;
+}
+int Buffer::GetPos() {
+	return bufPos + bufStart;
+}
+void Buffer::SetPos(int value) {
+	if ((value >= fileLen) && (stream != NULL) && !CanSeek()) {
+		// Wanted position is after buffer and the stream
+		// is not seek-able e.g. network or console,
+		// thus we have to read the stream manually till
+		// the wanted position is in sight.
+		while ((value >= fileLen) && (ReadNextStreamChunk() > 0));
+	}
+	if ((value < 0) || (value > fileLen)) {
+		wprintf(L"--- buffer out of bounds access, position: %d\n", value);
+		exit(1);
+	}
+	if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer
+		bufPos = value - bufStart;
+	} else if (stream != NULL) { // must be swapped in
+		fseek(stream, value, SEEK_SET);
+		bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream);
+		bufStart = value; bufPos = 0;
+	} else {
+		bufPos = fileLen - bufStart; // make Pos return fileLen
+	}
+}
+// Read the next chunk of bytes from the stream, increases the buffer
+// if needed and updates the fields fileLen and bufLen.
+// Returns the number of bytes read.
+int Buffer::ReadNextStreamChunk() {
+	int free = bufCapacity - bufLen;
+	if (free == 0) {
+		// in the case of a growing input stream
+		// we can neither seek in the stream, nor can we
+		// foresee the maximum length, thus we must adapt
+		// the buffer size on demand.
+		bufCapacity = bufLen * 2;
+		char *newBuf = new char[bufCapacity];
+		memcpy(newBuf, buf, bufLen*sizeof(char));
+		delete [] buf;
+		buf = newBuf;
+		free = bufLen;
+	}
+	int read = fread(buf + bufLen, sizeof(unsigned char), free, stream);
+	if (read > 0) {
+		fileLen = bufLen = (bufLen + read);
+		return read;
+	}
+	// end of stream reached
+	return 0;
+}
+bool Buffer::CanSeek() {
+	return (stream != NULL) && (ftell(stream) != -1);
+}
+int UTF8Buffer::Read() {
+	int ch;
+	do {
+		ch = Buffer::Read();
+		// until we find a utf8 start (0xxxxxxx or 11xxxxxx)
+	} while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF));
+	if (ch < 128 || ch == EoF) {
+		// nothing to do, first 127 chars are the same in ascii and utf8
+		// 0xxxxxxx or end of file character
+	} else if ((ch & 0xF0) == 0xF0) {
+		// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+		int c1 = ch & 0x07; ch = Buffer::Read();
+		int c2 = ch & 0x3F; ch = Buffer::Read();
+		int c3 = ch & 0x3F; ch = Buffer::Read();
+		int c4 = ch & 0x3F;
+		ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
+	} else if ((ch & 0xE0) == 0xE0) {
+		// 1110xxxx 10xxxxxx 10xxxxxx
+		int c1 = ch & 0x0F; ch = Buffer::Read();
+		int c2 = ch & 0x3F; ch = Buffer::Read();
+		int c3 = ch & 0x3F;
+		ch = (((c1 << 6) | c2) << 6) | c3;
+	} else if ((ch & 0xC0) == 0xC0) {
+		// 110xxxxx 10xxxxxx
+		int c1 = ch & 0x1F; ch = Buffer::Read();
+		int c2 = ch & 0x3F;
+		ch = (c1 << 6) | c2;
+	}
+	return ch;
+}
+Scanner::Scanner(const char* buf, size_t len) {
+	buffer = new Buffer(buf, len);
+	Init();
+}
+Scanner::Scanner(const wchar_t* fileName) {
+	FILE* stream;
+	char *chFileName = coco_string_create_char(fileName);
+	if ((stream = fopen(chFileName, "rb")) == NULL) {
+		wprintf(L"--- Cannot open file %ls\n", fileName);
+		exit(1);
+	}
+	coco_string_delete(chFileName);
+	buffer = new Buffer(stream, false);
+	Init();
+}
+Scanner::Scanner(FILE* s) {
+	buffer = new Buffer(s, true);
+	Init();
+}
+Scanner::~Scanner() {
+	char* cur = (char*) firstHeap;
+	while(cur != NULL) {
+		cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE);
+		free(firstHeap);
+		firstHeap = cur;
+	}
+	delete [] tval;
+	delete buffer;
+}
+void Scanner::Init() {
+	EOL    = '\n';
+	eofSym = 0;
+	maxT = 21;
+	noSym = 21;
+	int i;
+	for (i = 65; i <= 90; ++i) start.set(i, 1);
+	for (i = 97; i <= 122; ++i) start.set(i, 2);
+	for (i = 49; i <= 57; ++i) start.set(i, 3);
+	for (i = 10; i <= 10; ++i) start.set(i, 15);
+	for (i = 13; i <= 13; ++i) start.set(i, 15);
+	start.set(48, 19);
+	start.set(34, 20);
+	start.set(39, 10);
+	start.set(36, 21);
+	start.set(60, 24);
+	start.set(44, 25);
+	start.set(62, 26);
+	start.set(61, 27);
+	start.set(58, 28);
+	start.set(91, 29);
+	start.set(93, 30);
+	start.set(123, 31);
+	start.set(125, 32);
+		start.set(Buffer::EoF, -1);
+	keywords.set(L"struct", 17);
+	keywords.set(L"import", 20);
+	tvalLength = 128;
+	tval = new wchar_t[tvalLength]; // text of current token
+	// COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
+	heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
+	firstHeap = heap;
+	heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE);
+	*heapEnd = 0;
+	heapTop = heap;
+	if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) {
+		wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n");
+		exit(1);
+	}
+	pos = -1; line = 1; col = 0; charPos = -1;
+	oldEols = 0;
+	NextCh();
+	if (ch == 0xEF) { // check optional byte order mark for UTF-8
+		NextCh(); int ch1 = ch;
+		NextCh(); int ch2 = ch;
+		if (ch1 != 0xBB || ch2 != 0xBF) {
+			wprintf(L"Illegal byte order mark at start of file");
+			exit(1);
+		}
+		Buffer *oldBuf = buffer;
+		buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
+		delete oldBuf; oldBuf = NULL;
+		NextCh();
+	}
+	pt = tokens = CreateToken(); // first token is a dummy
+}
+void Scanner::NextCh() {
+	if (oldEols > 0) { ch = EOL; oldEols--; }
+	else {
+		pos = buffer->GetPos();
+		// buffer reads unicode chars, if UTF8 has been detected
+		ch = buffer->Read(); col++; charPos++;
+		// replace isolated '\r' by '\n' in order to make
+		// eol handling uniform across Windows, Unix and Mac
+		if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL;
+		if (ch == EOL) { line++; col = 0; }
+	}
+}
+void Scanner::AddCh() {
+	if (tlen >= tvalLength) {
+		tvalLength *= 2;
+		wchar_t *newBuf = new wchar_t[tvalLength];
+		memcpy(newBuf, tval, tlen*sizeof(wchar_t));
+		delete [] tval;
+		tval = newBuf;
+	}
+	if (ch != Buffer::EoF) {
+		tval[tlen++] = ch;
+		NextCh();
+	}
+}
+bool Scanner::Comment0() {
+	int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
+	NextCh();
+	if (ch == L'/') {
+		NextCh();
+		for(;;) {
+			if (ch == 10) {
+				level--;
+				if (level == 0) { oldEols = line - line0; NextCh(); return true; }
+				NextCh();
+			} else if (ch == buffer->EoF) return false;
+			else NextCh();
+		}
+	} else {
+		buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
+	}
+	return false;
+}
+bool Scanner::Comment1() {
+	int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;
+	NextCh();
+	if (ch == L'*') {
+		NextCh();
+		for(;;) {
+			if (ch == L'*') {
+				NextCh();
+				if (ch == L'/') {
+					level--;
+					if (level == 0) { oldEols = line - line0; NextCh(); return true; }
+					NextCh();
+				}
+			} else if (ch == L'/') {
+				NextCh();
+				if (ch == L'*') {
+					level++; NextCh();
+				}
+			} else if (ch == buffer->EoF) return false;
+			else NextCh();
+		}
+	} else {
+		buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;
+	}
+	return false;
+}
+void Scanner::CreateHeapBlock() {
+	void* newHeap;
+	char* cur = (char*) firstHeap;
+	while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) {
+		cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE));
+		free(firstHeap);
+		firstHeap = cur;
+	}
+	// COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block
+	newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*));
+	*heapEnd = newHeap;
+	heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE);
+	*heapEnd = 0;
+	heap = newHeap;
+	heapTop = heap;
+}
+Token* Scanner::CreateToken() {
+	Token *t;
+	if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) {
+		CreateHeapBlock();
+	}
+	t = (Token*) heapTop;
+	heapTop = (void*) ((char*) heapTop + sizeof(Token));
+	t->val = NULL;
+	t->next = NULL;
+	return t;
+}
+void Scanner::AppendVal(Token *t) {
+	int reqMem = (tlen + 1) * sizeof(wchar_t);
+	if (((char*) heapTop + reqMem) >= (char*) heapEnd) {
+		if (reqMem > COCO_HEAP_BLOCK_SIZE) {
+			wprintf(L"--- Too long token value\n");
+			exit(1);
+		}
+		CreateHeapBlock();
+	}
+	t->val = (wchar_t*) heapTop;
+	heapTop = (void*) ((char*) heapTop + reqMem);
+	wcsncpy(t->val, tval, tlen);
+	t->val[tlen] = L'\0';
+}
+Token* Scanner::NextToken() {
+	while (ch == ' ' ||
+			(ch >= 9 && ch <= 10) || ch == 13
+	) NextCh();
+	if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) return NextToken();
+	int recKind = noSym;
+	int recEnd = pos;
+	t = CreateToken();
+	t->pos = pos; t->col = col; t->line = line; t->charPos = charPos;
+	int state = start.state(ch);
+	tlen = 0; AddCh();
+	switch (state) {
+		case -1: { t->kind = eofSym; break; } // NextCh already done
+		case 0: {
+			case_0:
+			if (recKind != noSym) {
+				tlen = recEnd - t->pos;
+				SetScannerBehindT();
+			}
+			t->kind = recKind; break;
+		} // NextCh already done
+		case 1:
+			case_1:
+			recEnd = pos; recKind = 1;
+			if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;}
+			else {t->kind = 1; break;}
+		case 2:
+			case_2:
+			recEnd = pos; recKind = 2;
+			if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_2;}
+			else {t->kind = 2; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}
+		case 3:
+			case_3:
+			recEnd = pos; recKind = 3;
+			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
+			else if (ch == L'.') {AddCh(); goto case_4;}
+			else {t->kind = 3; break;}
+		case 4:
+			case_4:
+			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_5;}
+			else {goto case_0;}
+		case 5:
+			case_5:
+			recEnd = pos; recKind = 3;
+			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_5;}
+			else {t->kind = 3; break;}
+		case 6:
+			case_6:
+			if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_7;}
+			else {goto case_0;}
+		case 7:
+			case_7:
+			recEnd = pos; recKind = 4;
+			if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_7;}
+			else {t->kind = 4; break;}
+		case 8:
+			case_8:
+			{t->kind = 5; break;}
+		case 9:
+			case_9:
+			{t->kind = 6; break;}
+		case 10:
+			if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_11;}
+			else if (ch == 92) {AddCh(); goto case_12;}
+			else {goto case_0;}
+		case 11:
+			case_11:
+			if (ch == 39) {AddCh(); goto case_14;}
+			else {goto case_0;}
+		case 12:
+			case_12:
+			if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_13;}
+			else {goto case_0;}
+		case 13:
+			case_13:
+			if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_13;}
+			else if (ch == 39) {AddCh(); goto case_14;}
+			else {goto case_0;}
+		case 14:
+			case_14:
+			{t->kind = 7; break;}
+		case 15:
+			{t->kind = 8; break;}
+		case 16:
+			case_16:
+			recEnd = pos; recKind = 9;
+			if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_16;}
+			else {t->kind = 9; break;}
+		case 17:
+			case_17:
+			recEnd = pos; recKind = 22;
+			if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_17;}
+			else {t->kind = 22; break;}
+		case 18:
+			case_18:
+			recEnd = pos; recKind = 23;
+			if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_18;}
+			else {t->kind = 23; break;}
+		case 19:
+			recEnd = pos; recKind = 3;
+			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_3;}
+			else if (ch == L'.') {AddCh(); goto case_4;}
+			else if (ch == L'x') {AddCh(); goto case_6;}
+			else {t->kind = 3; break;}
+		case 20:
+			case_20:
+			if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_20;}
+			else if (ch == 10 || ch == 13) {AddCh(); goto case_9;}
+			else if (ch == L'"') {AddCh(); goto case_8;}
+			else if (ch == 92) {AddCh(); goto case_22;}
+			else {goto case_0;}
+		case 21:
+			recEnd = pos; recKind = 22;
+			if ((ch >= L'A' && ch <= L'Z')) {AddCh(); goto case_16;}
+			else if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_17;}
+			else if ((ch >= L'a' && ch <= L'z')) {AddCh(); goto case_23;}
+			else {t->kind = 22; break;}
+		case 22:
+			case_22:
+			if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_20;}
+			else {goto case_0;}
+		case 23:
+			case_23:
+			recEnd = pos; recKind = 22;
+			if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_17;}
+			else if ((ch >= L'a' && ch <= L'z')) {AddCh(); goto case_23;}
+			else if (ch == L'=') {AddCh(); goto case_18;}
+			else {t->kind = 22; break;}
+		case 24:
+			{t->kind = 10; break;}
+		case 25:
+			{t->kind = 11; break;}
+		case 26:
+			{t->kind = 12; break;}
+		case 27:
+			{t->kind = 13; break;}
+		case 28:
+			{t->kind = 14; break;}
+		case 29:
+			{t->kind = 15; break;}
+		case 30:
+			{t->kind = 16; break;}
+		case 31:
+			{t->kind = 18; break;}
+		case 32:
+			{t->kind = 19; break;}
+	}
+	AppendVal(t);
+	return t;
+}
+void Scanner::SetScannerBehindT() {
+	buffer->SetPos(t->pos);
+	NextCh();
+	line = t->line; col = t->col; charPos = t->charPos;
+	for (int i = 0; i < tlen; i++) NextCh();
+}
+// get the next token (possibly a token already seen during peeking)
+Token* Scanner::Scan() {
+	if (tokens->next == NULL) {
+		return pt = tokens = NextToken();
+	} else {
+		pt = tokens = tokens->next;
+		return tokens;
+	}
+}
+// peek for the next token, ignore pragmas
+Token* Scanner::Peek() {
+	do {
+		if (pt->next == NULL) {
+			pt->next = NextToken();
+		}
+		pt = pt->next;
+	} while (pt->kind > maxT); // skip pragmas
+	return pt;
+}
+// make sure that peeking starts at the current scan position
+void Scanner::ResetPeek() {
+	pt = tokens;
+}
+} // namespace