npm - sommark - Versions diffs - 3.1.0 → 3.2.0 - Mend

sommark 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/core/lexer.js CHANGED Viewed

@@ -175,9 +175,8 @@ function lexer(src) {
 	if (src && typeof src === "string") {
 		const tokens = [];
 		let scope_state = false;
-		let line = 1;
-		let start = 1;
-		let end = 0;
+		let line = 0;
+		let character = 0;
 		let depth_stack = [];
 		let context = "",
 			temp_str = "",
@@ -194,23 +193,36 @@ function lexer(src) {
 		}
 		function addToken(type, value) {
-			tokens.push({ type, value, line, start, end, depth: depth_stack.length });
+			const startPos = { line, character };
+			// Update position based on value length and newlines
+			const newlines = (value.match(/\n/g) || []).length;
+			if (newlines > 0) {
+				line += newlines;
+				const parts = value.split("\n");
+				character = parts[parts.length - 1].length;
+			} else {
+				character += value.length;
+			}
+			const endPos = { line, character };
+			tokens.push({
+				type,
+				value,
+				range: { start: startPos, end: endPos },
+				depth: depth_stack.length
+			});
 		}
-		const updateMetadata = text => {
-			const newlines = updateNewLine(text) || 0;
+		// Helper to advance position without adding a token (e.g., for whitespace/newlines that don't emit tokens)
+		function advance(text) {
+			const newlines = (text.match(/\n/g) || []).length;
 			if (newlines > 0) {
-				const lines = text.split("\n");
-				const lastLineLength = lines[lines.length - 1].length;
-				start = end + 1;
-				end = lastLineLength;
 				line += newlines;
+				const parts = text.split("\n");
+				character = parts[parts.length - 1].length;
 			} else {
-				const cols = updateColumn(end, text.length);
-				start = cols.start;
-				end = cols.end;
+				character += text.length;
 			}
-		};
+		}
 		for (let i = 0; i < src.length; i++) {
 			let current_char = src[i];
@@ -218,8 +230,6 @@ function lexer(src) {
 			//  Token: Open Bracket                                                       //
 			// ========================================================================== //
 			if (current_char === "[" && !scope_state && previous_value !== "(") {
-				// Update Metadata
-				updateMetadata(current_char);
 				// i + 1 -> skip current character
 				temp_str = concatChar(src, i + 1, ["]"]);
 				if (temp_str && temp_str.length > 0) {
@@ -239,8 +249,6 @@ function lexer(src) {
 			//  Token: Equal Sign                                                         //
 			// ========================================================================== //
 			else if (current_char === "=" && !scope_state) {
-				// Update Metadata
-				updateMetadata(current_char);
 				addToken(TOKEN_TYPES.EQUAL, current_char);
 				previous_value = current_char;
 			}
@@ -248,8 +256,6 @@ function lexer(src) {
 			//  Token: Close Bracket                                                      //
 			// ========================================================================== //
 			else if (current_char === "]" && !scope_state) {
-				// Update Metadata
-				updateMetadata(current_char);
 				addToken(TOKEN_TYPES.CLOSE_BRACKET, current_char);
 				if (previous_value === end_keyword) {
 					depth_stack.pop();
@@ -260,8 +266,6 @@ function lexer(src) {
 			//  Token: Open Parenthesis '('                                               //
 			// ========================================================================== //
 			else if (current_char === "(" && !scope_state) {
-				// Update Metadata
-				updateMetadata(current_char);
 				addToken(TOKEN_TYPES.OPEN_PAREN, current_char);
 				if (previous_value !== "->") {
 					previous_value = current_char;
@@ -273,8 +277,6 @@ function lexer(src) {
 			else if (current_char === "-" && peek(src, i, 1) === ">") {
 				temp_str = current_char + peek(src, i, 1);
 				i += temp_str.length - 1;
-				// Update Metadata
-				updateMetadata(temp_str);
 				addToken(TOKEN_TYPES.THIN_ARROW, temp_str);
 				previous_value = temp_str;
 			}
@@ -282,8 +284,6 @@ function lexer(src) {
 			//  Token: Close Parenthesis ')'                                              //
 			// ========================================================================== //
 			else if (current_char === ")" && !scope_state) {
-				// Update Metadata
-				updateMetadata(current_char);
 				addToken(TOKEN_TYPES.CLOSE_PAREN, current_char);
 				previous_value = current_char;
 			}
@@ -297,8 +297,6 @@ function lexer(src) {
 			) {
 				temp_str = current_char + peek(src, i, 1);
 				i += temp_str.length - 1;
-				// Update Metadata
-				updateMetadata(temp_str);
 				addToken(TOKEN_TYPES.OPEN_AT, temp_str);
 				// is next token end keyword?
 				if (isAtBlockEnd(src, i - 1)) {
@@ -313,8 +311,6 @@ function lexer(src) {
 			else if (current_char === "_" && peek(src, i, 1) === "@") {
 				temp_str = current_char + peek(src, i, 1);
 				i += temp_str.length - 1;
-				// Update Metadata
-				updateMetadata(temp_str);
 				addToken(TOKEN_TYPES.CLOSE_AT, temp_str);
 				switch (previous_value) {
 					case at_id:
@@ -341,8 +337,6 @@ function lexer(src) {
 					previous_value === INLINECOLON) &&
 				!scope_state
 			) {
-				// Update Metadata
-				updateMetadata(current_char);
 				addToken(TOKEN_TYPES.COLON, current_char);
 				switch (previous_value) {
 					case block_id_2:
@@ -371,8 +365,6 @@ function lexer(src) {
 					previous_value === ATBLOCKCOMMA ||
 					previous_value === INLINECOMMA)
 			) {
-				// Update Metadata
-				updateMetadata(current_char);
 				addToken(TOKEN_TYPES.COMMA, current_char);
 				switch (previous_value) {
 					case "=":
@@ -398,8 +390,6 @@ function lexer(src) {
 				(current_char === ";" && previous_value === ";") ||
 				(current_char === ";" && previous_value === ATBLOCKCOMMA)
 			) {
-				// Update Metadata
-				updateMetadata(current_char);
 				addToken(TOKEN_TYPES.SEMICOLON, current_char);
 				scope_state = true;
 				previous_value = current_char;
@@ -410,21 +400,17 @@ function lexer(src) {
 			else if (current_char === "\\") {
 				temp_str = concatEscape(src, i);
 				i += temp_str.length - 1;
-				updateMetadata(temp_str);
 				temp_str = temp_str.trim();
 				if (temp_str && temp_str.length > 0) {
-					// Add Token
 					addToken(TOKEN_TYPES.ESCAPE, temp_str);
 				}
 			}
 			// ========================================================================== //
-			//  Count Newlines                                                            //
+			//  Count Newlines and Whitespace (No Tokens)                                 //
 			// ========================================================================== //
 			else if (current_char === "\n") {
 				if (!scope_state) {
-					line++;
-					start = 1;
-					end = 0;
+					advance(current_char);
 					continue;
 				}
 			}
@@ -438,8 +424,6 @@ function lexer(src) {
 				if (previous_value === "[" && !scope_state) {
 					temp_str = concatChar(src, i, ["=", "]"]);
 					i += temp_str.length - 1;
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						const trimmedStr = temp_str.trim();
 						if (trimmedStr !== end_keyword) {
@@ -464,8 +448,6 @@ function lexer(src) {
 					temp_str = concatChar(src, i, ["]", "\\", ",", ":"]);
 					i += temp_str.length - 1;
 					const nextToken = peek(src, i, 1);
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						// Add token
 						switch (nextToken) {
@@ -489,8 +471,6 @@ function lexer(src) {
 					temp_str = concatChar(src, i, ["(", ")", ":"]);
 					i += temp_str.length - 1;
 					const nextToken = peek(src, i, 1);
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						// Add Token
 						switch (nextToken) {
@@ -521,8 +501,6 @@ function lexer(src) {
 				) {
 					temp_str = concatChar(src, i, [")", "\\", ",", previous_value === INLINECOLON ? ":" : null]);
 					i += temp_str.length - 1;
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						// Add Token
 						addToken(TOKEN_TYPES.VALUE, temp_str);
@@ -536,8 +514,6 @@ function lexer(src) {
 				else if (previous_value === "@_") {
 					temp_str = concatChar(src, i, ["_", ":"]);
 					i += temp_str.length - 1;
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						const trimmedStr = temp_str.trim();
 						if (trimmedStr !== end_keyword) {
@@ -555,8 +531,6 @@ function lexer(src) {
 					temp_str = concatChar(src, i, [";", "\\", ",", ":"]);
 					i += temp_str.length - 1;
 					const nextToken = peek(src, i, 1);
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						switch (nextToken) {
 							case ":":
@@ -578,8 +552,6 @@ function lexer(src) {
 				else if ((previous_value === block_end && !scope_state) || previous_value === at_end) {
 					temp_str = concatChar(src, i, ["]", "_"]);
 					i += temp_str.length - 1;
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						addToken(TOKEN_TYPES.END_KEYWORD, temp_str);
 						// Update Previous Value
@@ -592,8 +564,6 @@ function lexer(src) {
 				// ========================================================================== //
 				else if (current_char === "#") {
 					temp_str = concatChar(src, i, ["\n"]);
-					// Update Metadata
-					updateMetadata(temp_str);
 					if (temp_str.trim()) {
 						i += temp_str.length - 1;
 						addToken(TOKEN_TYPES.COMMENT, temp_str);
@@ -615,8 +585,6 @@ function lexer(src) {
 						[")", previous_value === inline_value]
 					]);
 					i += context.length - 1;
-					// Update Metadata
-					updateMetadata(context);
 					if (context.trim()) {
 						addToken(TOKEN_TYPES.TEXT, context);
 					}
@@ -625,6 +593,16 @@ function lexer(src) {
 			context = "";
 			temp_str = "";
 		}
+		// Ensure EOF token
+		const eofPos = { line, character };
+		tokens.push({
+			type: TOKEN_TYPES.EOF,
+			value: "",
+			range: { start: eofPos, end: eofPos },
+			depth: depth_stack.length
+		});
 		return tokens;
 	} else {
 		lexerError([

package/core/parser.js CHANGED Viewed

@@ -38,7 +38,11 @@ function makeBlockNode() {
 		id: "",
 		args: [],
 		body: [],
-		depth: 0
+		depth: 0,
+		range: {
+			start: { line: 0, character: 0 },
+			end: { line: 0, character: 0 }
+		}
 	};
 }
@@ -46,7 +50,11 @@ function makeTextNode() {
 	return {
 		type: TEXT,
 		text: "",
-		depth: 0
+		depth: 0,
+		range: {
+			start: { line: 0, character: 0 },
+			end: { line: 0, character: 0 }
+		}
 	};
 }
@@ -54,7 +62,11 @@ function makeCommentNode() {
 	return {
 		type: COMMENT,
 		text: "",
-		depth: 0
+		depth: 0,
+		range: {
+			start: { line: 0, character: 0 },
+			end: { line: 0, character: 0 }
+		}
 	};
 }
@@ -64,7 +76,11 @@ function makeInlineNode() {
 		value: "",
 		id: "",
 		args: [],
-		depth: 0
+		depth: 0,
+		range: {
+			start: { line: 0, character: 0 },
+			end: { line: 0, character: 0 }
+		}
 	};
 }
@@ -74,30 +90,34 @@ function makeAtBlockNode() {
 		id: "",
 		args: [],
 		content: "",
-		depth: 0
+		depth: 0,
+		range: {
+			start: { line: 0, character: 0 },
+			end: { line: 0, character: 0 }
+		}
 	};
 }
 let end_stack = [];
 let tokens_stack = [];
-let line = 1,
-	start = 1,
-	end = 1,
+let range = {
+	start: { line: 0, character: 0 },
+	end: { line: 0, character: 0 }
+},
 	value = "";
 const fallback = {
 	value: "Unknown",
-	line: "Unknown",
-	start: "Unknown",
-	end: "Unknown",
+	range: {
+		start: { line: 0, character: 0 },
+		end: { line: 0, character: 0 }
+	},
 	tokens_stack: ["--Empty--"]
 };
 const updateData = (tokens, i) => {
 	if (tokens[i]) {
 		tokens_stack.push(tokens[i].value);
-		line = tokens[i].line;
-		start = tokens[i].start;
-		end = tokens[i].end;
+		range = tokens[i].range;
 		value = tokens[i].value;
 	}
 };
@@ -130,11 +150,11 @@ const errorMessage = (tokens, i, expectedValue, behindValue, frontText) => {
 	return [
 		`<$blue:{line}$><$red:Here where error occurred:$>{N}${lineContent}{N}${pointerPadding}<$yellow:^$>{N}{N}`,
-		`<$red:${frontText ? frontText : "Expected token"}$> <$blue:'${expectedValue}'$> ${behindValue ? "after <$blue:'" + behindValue + "'$>" : ""} at line <$yellow:${line}$>,`,
-		` from column <$yellow: ${start}$> to <$yellow: ${end}$>`,
+		`<$red:${frontText ? frontText : "Expected token"}$> <$blue:'${expectedValue}'$> ${behindValue ? "after <$blue:'" + behindValue + "'$>" : ""} at line <$yellow:${current.range.start.line + 1}$>,`,
+		` from column <$yellow: ${current.range.start.character}$> to <$yellow: ${current.range.end.character}$>`,
 		`{N}<$yellow:Received:$> <$blue:'${value === "\n" ? "\\n' (newline)" : value}'$>`,
-		` at line <$yellow:${current.line}$>,`,
-		` from column <$yellow: ${current.start}$> to <$yellow: ${current.end}$>{N}`,
+		` at line <$yellow:${current.range.start.line + 1}$>,`,
+		` from column <$yellow: ${current.range.start.character}$> to <$yellow: ${current.range.end.character}$>{N}`,
 		"<$blue:{line}$>"
 	];
 };
@@ -233,21 +253,21 @@ function parseSemiColon(tokens, i, afterChar = "") {
 // ========================================================================== //
 function parseBlock(tokens, i) {
 	const blockNode = makeBlockNode();
+	const openBracketToken = current_token(tokens, i);
 	// ========================================================================== //
 	//  consume '['                                                               //
 	// ========================================================================== //
 	i++;
 	updateData(tokens, i);
-	if (!current_token(tokens, i) || (current_token(tokens, i) && current_token(tokens, i).type !== TOKEN_TYPES.IDENTIFIER)) {
-		parserError(errorMessage(tokens, i, block_id, "["));
-	}
-	const id = current_token(tokens, i).value;
+	const idToken = current_token(tokens, i);
+	const id = idToken.value;
 	if (id.trim() === end_keyword) {
 		parserError(errorMessage(tokens, i, id, "", `'${id.trim()}' is a reserved keyword and cannot be used as an identifier.`));
 	}
 	blockNode.id = id.trim();
 	validateName(blockNode.id);
-	blockNode.depth = current_token(tokens, i).depth;
+	blockNode.depth = idToken.depth;
+	blockNode.range.start = openBracketToken.range.start;
 	end_stack.push(id);
 	// ========================================================================== //
 	//  consume Block Identifier                                                  //
@@ -403,8 +423,10 @@ function parseBlock(tokens, i) {
 			// ========================================================================== //
 			//  consume ']'                                                               //
 			// ========================================================================== //
+			const closeBracketToken = current_token(tokens, i);
 			i++;
 			updateData(tokens, i);
+			blockNode.range.end = closeBracketToken.range.end;
 			break;
 		} else {
 			const [childNode, nextIndex] = parseNode(tokens, i);
@@ -423,6 +445,8 @@ function parseBlock(tokens, i) {
 // ========================================================================== //
 function parseInline(tokens, i) {
 	const inlineNode = makeInlineNode();
+	const openParenToken = current_token(tokens, i);
+	inlineNode.range.start = openParenToken.range.start;
 	// ========================================================================== //
 	//  consume '('                                                               //
 	// ========================================================================== //
@@ -569,8 +593,10 @@ function parseInline(tokens, i) {
 	// ========================================================================== //
 	//  consume ')'                                                               //
 	// ========================================================================== //
+	const finalParenToken = current_token(tokens, i);
 	i++;
 	updateData(tokens, i);
+	inlineNode.range.end = finalParenToken.range.end;
 	tokens_stack.length = 0;
 	return [inlineNode, i];
 }
@@ -579,7 +605,9 @@ function parseInline(tokens, i) {
 // ========================================================================== //
 function parseText(tokens, i, options = {}) {
 	const textNode = makeTextNode();
-	textNode.depth = current_token(tokens, i).depth;
+	const startToken = current_token(tokens, i);
+	textNode.range.start = startToken.range.start;
+	textNode.depth = startToken.depth;
 	const { selectiveUnescape = false } = options;
 	while (i < tokens.length) {
@@ -604,6 +632,7 @@ function parseText(tokens, i, options = {}) {
 		} else {
 			break;
 		}
+		textNode.range.end = current_token(tokens, i - 1).range.end;
 	}
 	return [textNode, i];
 }
@@ -612,6 +641,8 @@ function parseText(tokens, i, options = {}) {
 // ========================================================================== //
 function parseAtBlock(tokens, i) {
 	const atBlockNode = makeAtBlockNode();
+	const openAtToken = current_token(tokens, i);
+	atBlockNode.range.start = openAtToken.range.start;
 	// ========================================================================== //
 	//  consume '@_'                                                              //
 	// ========================================================================== //
@@ -754,8 +785,10 @@ function parseAtBlock(tokens, i) {
 	// ========================================================================== //
 	//  consume '_@'                                                              //
 	// ========================================================================== //
+	const closeAtToken = current_token(tokens, i);
 	i++;
 	updateData(tokens, i);
+	atBlockNode.range.end = closeAtToken.range.end;
 	tokens_stack.length = 0;
 	return [atBlockNode, i];
 }
@@ -764,9 +797,11 @@ function parseAtBlock(tokens, i) {
 // ========================================================================== //
 function parseCommentNode(tokens, i) {
 	const commentNode = makeCommentNode();
-	if (current_token(tokens, i) && current_token(tokens, i).type === TOKEN_TYPES.COMMENT) {
-		commentNode.text = current_token(tokens, i).value;
-		commentNode.depth = current_token(tokens, i).depth;
+	const token = current_token(tokens, i);
+	if (token && token.type === TOKEN_TYPES.COMMENT) {
+		commentNode.text = token.value;
+		commentNode.depth = token.depth;
+		commentNode.range = token.range;
 	}
 	// ========================================================================== //
 	//  consume Comment '#'                                                       //
@@ -825,9 +860,10 @@ function parseNode(tokens, i) {
 function parser(tokens) {
 	end_stack = [];
 	tokens_stack = [];
-	line = 1;
-	start = 1;
-	end = 1;
+	range = {
+		start: { line: 0, character: 0 },
+		end: { line: 0, character: 0 }
+	};
 	value = "";
 	let ast = [];
 	let i = 0;

package/core/tokenTypes.js CHANGED Viewed

@@ -16,7 +16,8 @@ const TOKEN_TYPES = {
   COMMA: "COMMA",
   SEMICOLON: "SEMICOLON",
   COMMENT: "COMMENT",
-  ESCAPE: "ESCAPE"
+  ESCAPE: "ESCAPE",
+  EOF: "EOF"
 };
 export default TOKEN_TYPES;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "sommark",
-	"version": "3.1.0",
+	"version": "3.2.0",
 	"description": "SomMark is a declarative, extensible markup language for structured content that can be converted to HTML, Markdown, MDX, JSON, and more.",
 	"main": "index.js",
 	"directories": {