npm - @atomic-ehr/fhirpath - Versions diffs - 0.0.1-canary.0c6931e.20250727185306 - Mend

@atomic-ehr/fhirpath 0.0.1-canary.0c6931e.20250727185306

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

package/README.md +473 -0
package/dist/index.d.ts +462 -0
package/dist/index.js +10307 -0
package/dist/index.js.map +1 -0
package/package.json +58 -0
package/src/analyzer/analyzer.ts +499 -0
package/src/analyzer/model-provider.ts +244 -0
package/src/analyzer/schemas/index.ts +2 -0
package/src/analyzer/schemas/types.ts +40 -0
package/src/analyzer/types.ts +142 -0
package/src/api/builder.ts +157 -0
package/src/api/errors.ts +145 -0
package/src/api/expression.ts +156 -0
package/src/api/index.ts +122 -0
package/src/api/inspect.ts +99 -0
package/src/api/registry.ts +128 -0
package/src/api/types.ts +210 -0
package/src/compiler/compiler.ts +546 -0
package/src/compiler/index.ts +2 -0
package/src/compiler/prototype-context-adapter.ts +99 -0
package/src/compiler/types.ts +24 -0
package/src/index.ts +107 -0
package/src/interpreter/README.md +78 -0
package/src/interpreter/interpreter.ts +475 -0
package/src/interpreter/types.ts +108 -0
package/src/lexer/char-tables.ts +37 -0
package/src/lexer/errors.ts +31 -0
package/src/lexer/index.ts +5 -0
package/src/lexer/lexer.ts +745 -0
package/src/lexer/token.ts +104 -0
package/src/lexer2/index.md +232 -0
package/src/lexer2/index.perf.test.ts +68 -0
package/src/lexer2/index.test.ts +549 -0
package/src/lexer2/index.ts +1251 -0
package/src/lexer2/notes.md +173 -0
package/src/lexer2/optimization-summary.md +718 -0
package/src/parser/ast-factory.ts +220 -0
package/src/parser/ast.ts +144 -0
package/src/parser/collection-parser.ts +89 -0
package/src/parser/diagnostic-messages.ts +216 -0
package/src/parser/diagnostics.ts +85 -0
package/src/parser/error-reporter.ts +230 -0
package/src/parser/index.ts +3 -0
package/src/parser/literal-parser.ts +103 -0
package/src/parser/parse-error.ts +16 -0
package/src/parser/parser-error-factory.ts +141 -0
package/src/parser/parser-state.ts +134 -0
package/src/parser/parser.ts +1272 -0
package/src/parser/pprint.ts +169 -0
package/src/parser/precedence-manager.ts +64 -0
package/src/parser/source-mapper.ts +248 -0
package/src/parser/special-constructs.ts +142 -0
package/src/parser/token-navigator.ts +110 -0
package/src/parser/types.ts +60 -0
package/src/parser2/index.md +177 -0
package/src/parser2/index.perf.test.ts +184 -0
package/src/parser2/index.test.ts +305 -0
package/src/parser2/index.ts +578 -0
package/src/parser2/optimization-summary.md +176 -0
package/src/registry/default-analyzers.ts +257 -0
package/src/registry/default-compilers.ts +31 -0
package/src/registry/index.ts +96 -0
package/src/registry/operations/arithmetic.ts +506 -0
package/src/registry/operations/collection.ts +425 -0
package/src/registry/operations/comparison.ts +432 -0
package/src/registry/operations/existence.ts +703 -0
package/src/registry/operations/filtering.ts +358 -0
package/src/registry/operations/literals.ts +341 -0
package/src/registry/operations/logical.ts +439 -0
package/src/registry/operations/math.ts +128 -0
package/src/registry/operations/membership.ts +132 -0
package/src/registry/operations/navigation.ts +52 -0
package/src/registry/operations/string.ts +507 -0
package/src/registry/operations/subsetting.ts +174 -0
package/src/registry/operations/type-checking.ts +162 -0
package/src/registry/operations/type-conversion.ts +404 -0
package/src/registry/operations/type-operators.ts +308 -0
package/src/registry/operations/utility.ts +644 -0
package/src/registry/registry.ts +146 -0
package/src/registry/types.ts +161 -0
package/src/registry/utils/evaluation-helpers.ts +93 -0
package/src/registry/utils/index.ts +3 -0
package/src/registry/utils/type-system.ts +173 -0
package/src/runtime/context.ts +158 -0
package/src/runtime/debug-context.ts +135 -0

package/src/lexer/token.ts ADDED Viewed

@@ -0,0 +1,104 @@
+export enum TokenType {
+  // Literals
+  LITERAL = 'LITERAL',            // Generic literal token for registry-based literals
+  NULL = 'NULL',                  // {} (nullLiteral in grammar)
+  TRUE = 'TRUE',                  // true
+  FALSE = 'FALSE',                // false
+  STRING = 'STRING',              // 'string value'
+  NUMBER = 'NUMBER',              // 123, 45.67, 0123 (allows leading zeros)
+  DATE = 'DATE',                  // @2024, @2024-01, @2024-01-15
+  DATETIME = 'DATETIME',          // @2024-01-15T10:30:00Z
+  TIME = 'TIME',                  // @T14:30:00
+  // Identifiers
+  IDENTIFIER = 'IDENTIFIER',      // [A-Za-z_][A-Za-z0-9_]*
+  DELIMITED_IDENTIFIER = 'DELIMITED_IDENTIFIER', // `identifier`
+  // Special variables
+  THIS = 'THIS',                  // $this
+  INDEX = 'INDEX',                // $index
+  TOTAL = 'TOTAL',                // $total
+  // Environment variables
+  ENV_VAR = 'ENV_VAR',           // %context, %`vs-name`
+  // Operators (by precedence)
+  DOT = 'DOT',                   // .
+  LBRACKET = 'LBRACKET',         // [
+  RBRACKET = 'RBRACKET',         // ]
+  LPAREN = 'LPAREN',             // (
+  RPAREN = 'RPAREN',             // )
+  // Arithmetic
+  PLUS = 'PLUS',                 // +
+  MINUS = 'MINUS',               // -
+  STAR = 'STAR',                 // *
+  SLASH = 'SLASH',               // /
+  DIV = 'DIV',                   // div
+  MOD = 'MOD',                   // mod
+  CONCAT = 'CONCAT',             // &
+  // Type operators
+  IS = 'IS',                     // is
+  AS = 'AS',                     // as
+  // Union
+  PIPE = 'PIPE',                 // |
+  // Comparison
+  LT = 'LT',                     // <
+  LTE = 'LTE',                   // <=
+  GT = 'GT',                     // >
+  GTE = 'GTE',                   // >=
+  EQ = 'EQ',                     // =
+  NEQ = 'NEQ',                   // !=
+  EQUIV = 'EQUIV',               // ~
+  NEQUIV = 'NEQUIV',             // !~
+  // Membership
+  IN = 'IN',                     // in
+  CONTAINS = 'CONTAINS',         // contains
+  // Boolean
+  AND = 'AND',                   // and
+  OR = 'OR',                     // or
+  XOR = 'XOR',                   // xor
+  IMPLIES = 'IMPLIES',           // implies
+  NOT = 'NOT',                   // not
+  // Collection
+  LBRACE = 'LBRACE',             // {
+  RBRACE = 'RBRACE',             // }
+  // Other
+  COMMA = 'COMMA',               // ,
+  EOF = 'EOF',
+  // Units (for quantities)
+  UNIT = 'UNIT',                 // year, month, 'mg', etc.
+  // Trivia tokens (when preserving whitespace/comments)
+  WS = 'WS',                     // Whitespace
+  COMMENT = 'COMMENT',           // /* Multi-line comment */
+  LINE_COMMENT = 'LINE_COMMENT', // // Single-line comment
+}
+export interface Position {
+  line: number;
+  column: number;
+  offset: number;
+}
+export enum Channel {
+  DEFAULT = 0,
+  HIDDEN = 1    // For whitespace and comments
+}
+export interface Token {
+  type: TokenType;
+  value: string;
+  position: Position;
+  channel?: Channel;  // Optional channel for trivia
+  operation?: any;    // Operation from registry (using any to avoid circular dependency)
+  literalValue?: any; // Parsed literal value for LITERAL tokens
+}

package/src/lexer2/index.md ADDED Viewed

@@ -0,0 +1,232 @@
+# Lexer2 Overview
+## Introduction
+Lexer2 is a high-performance manual lexer for the FHIRPath expression language, designed as a drop-in replacement for the ANTLR-based lexer. It achieves ~2.2M expressions/second, representing a 49% improvement over the initial implementation through systematic optimizations.
+## Algorithm Overview
+The lexer uses a single-pass, character-by-character scanning approach with the following key components:
+### 1. Character Classification via Lookup Tables
+Instead of function calls for character classification, we use pre-computed lookup tables:
+```typescript
+// src/lexer2/index.ts:184-203
+const IS_DIGIT = new Uint8Array(256);
+const IS_LETTER = new Uint8Array(256);
+const IS_LETTER_OR_DIGIT = new Uint8Array(256);
+const IS_HEX_DIGIT = new Uint8Array(256);
+```
+These 256-byte arrays provide O(1) character classification with excellent cache locality.
+### 2. Switch-Based Token Dispatch
+The main tokenization logic uses a switch statement on the first character for efficient dispatch:
+```typescript
+// src/lexer2/index.ts:786-906
+switch (firstChar) {
+  case "'": return this.readString();
+  case '`': return this.readDelimitedIdentifier();
+  case '@': return this.readDateTime();
+  case '$': return this.readSpecialIdentifier();
+  // ... single-character operators
+  default:
+    if (IS_DIGIT[firstCharCode]) return this.readNumber();
+    if (IS_LETTER[firstCharCode]) return this.readIdentifierOrKeyword();
+}
+```
+### 3. Optimized Keyword Recognition
+Keywords are recognized using nested switches on string length first, then value:
+```typescript
+// src/lexer2/index.ts:662-721
+switch (length) {
+  case 2:
+    switch (value) {
+      case 'as': type = TokenType.AS; break;
+      case 'in': type = TokenType.IN; break;
+      // ...
+    }
+    break;
+  case 3:
+    switch (value) {
+      case 'div': type = TokenType.DIV; break;
+      case 'mod': type = TokenType.MOD; break;
+      // ...
+    }
+    break;
+  // ...
+}
+```
+This approach filters out most identifiers immediately and compiles to efficient jump tables.
+## Design Decisions
+### 1. Token Representation
+After extensive benchmarking (see [optimization-summary.md](./optimization-summary.md#token-representation-benchmarks)), we use plain object literals:
+```typescript
+// src/lexer2/index.ts:159-165
+export interface Token {
+  type: TokenType;  // numeric enum
+  start: number;    // position in input string
+  end: number;      // end position
+  line: number;     // line number for error reporting
+  column: number;   // column number for error reporting
+}
+```
+Key findings:
+- Object literals are 78% faster than classes
+- Arrays are 244% slower despite lower memory usage
+- V8 optimizes object literals with consistent shapes via hidden classes
+### 2. Numeric Enums for Token Types
+Token types use numeric enums for better performance:
+```typescript
+// src/lexer2/index.ts:1-82
+export enum TokenType {
+  NULL,      // 0
+  BOOLEAN,   // 1
+  STRING,    // 2
+  NUMBER,    // 3
+  // ...
+}
+```
+Benefits:
+- 2.5% performance improvement over string enums
+- Smaller memory footprint (4 bytes vs string length)
+- Better switch statement optimization
+Debug support is maintained via helper functions:
+- `tokenTypeToString()` - converts numeric type to string
+- `debugTokens()` - human-readable token output
+### 3. Position Tracking Strategy
+The lexer tracks both:
+- **Character positions** (`start`, `end`) - for substring extraction
+- **Line/column information** - for error reporting
+```typescript
+// src/lexer2/index.ts:241-257
+private advance(): string {
+  const char = this.input[this.position] || '';
+  this.position++;
+  if (char === '\n') {
+    this.line++;
+    this.column = 1;
+  } else {
+    this.column++;
+  }
+  return char;
+}
+```
+This dual tracking adds ~4.2% overhead but provides essential debugging information.
+### 4. Whitespace Handling Optimization
+Whitespace recognition uses a character code switch for efficiency:
+```typescript
+// src/lexer2/index.ts:268-283
+switch (charCode) {
+  case 32:  // ' ' (space)
+  case 9:   // '\t' (tab)
+    this.position++;
+    this.column++;
+    break;
+  case 13:  // '\r' (carriage return)
+    this.position++;
+    break;
+  case 10:  // '\n' (line feed)
+    this.position++;
+    this.line++;
+    this.column = 1;
+    break;
+  default:
+    // Not whitespace
+}
+```
+### 5. Inline Hot Functions
+Critical path functions are inlined to reduce call overhead:
+- Character classification uses lookup tables directly
+- Digit reading loops are fully inlined
+- No separate utility functions in hot paths
+## Performance Characteristics
+Current performance: **~2,200K expressions/second**
+### Optimization Timeline:
+1. Initial implementation: ~1,477K expr/sec
+2. Lookup tables: ~1,546K expr/sec (+4.7%)
+3. Switch-based keywords: ~2,192K expr/sec (+42%)
+4. Character code switches: ~2,240K expr/sec (+2.2%)
+5. Numeric enums: ~2,200K expr/sec (+2.5%, with position tracking)
+### Failed Optimizations:
+- **Reusable tokens**: 11% performance degradation due to method call overhead
+- **Character arrays**: Slower than string indexing in modern V8
+- **Object pooling**: V8's allocation is already highly optimized
+## Future Optimization Opportunities
+Based on profiling, remaining optimization opportunities include:
+1. **Optimize readSpecialIdentifier** (est. 2-3% improvement)
+   - Remove substring call at line 734
+   - Use direct character comparison
+2. **Optimize readDateTime/readTimeFormat** (est. 1-2% improvement)
+   - Reduce redundant charCode lookups
+   - Cache frequently accessed positions
+3. **Whitespace lookup table** (est. 0.5-1% improvement)
+   - Replace switch with lookup table
+   - May improve branch prediction
+## Usage Example
+```typescript
+import { Lexer, TokenType, tokenTypeToString } from './index';
+const lexer = new Lexer("Patient.name.where(use = 'official')");
+const tokens = lexer.tokenize();
+// Process tokens
+for (const token of tokens) {
+  const value = lexer.getTokenValue(token);
+  const type = tokenTypeToString(token.type);
+  console.log(`${type}(${value}) at ${token.line}:${token.column}`);
+}
+// Debug output
+console.log(lexer.debugTokens());
+```
+## Architecture Integration
+The lexer integrates with the FHIRPath parser by:
+1. Providing a token stream via `tokenize()`
+2. Supporting position information for error reporting
+3. Maintaining compatibility with the existing Token interface
+4. Offering configurable whitespace/comment handling
+See [parser integration](../parser/parser.ts) for usage in the parsing pipeline.

package/src/lexer2/index.perf.test.ts ADDED Viewed

@@ -0,0 +1,68 @@
+import { describe, it } from 'bun:test';
+import { Lexer } from './index';
+import * as fs from 'fs';
+import * as path from 'path';
+describe('Lexer Performance', () => {
+  it('measures lexer performance on fixture expressions', () => {
+    runPerformanceTest(false);
+  });
+});
+function runPerformanceTest(preserveTrivia: boolean) {
+    const fixturesPath = path.join(process.cwd(), 'test', 'fixtures');
+    const iterations = 10000;
+    // Read all fixture files
+    const fixtureFiles = fs.readdirSync(fixturesPath)
+      .filter(file => file.endsWith('.json'))
+      .map(file => ({
+        name: file,
+        path: path.join(fixturesPath, file)
+      }));
+    console.log(`\nRunning lexer performance tests with ${iterations} iterations per expression`);
+    console.log(`Trivia preservation: ${preserveTrivia ? 'ENABLED' : 'DISABLED'}\n`);
+    let totalExpressions = 0;
+    let totalIterations = 0;
+    let totalTime = 0;
+    for (const fixture of fixtureFiles) {
+      console.log(`Processing ${fixture.name}...`);
+      const content = fs.readFileSync(fixture.path, 'utf-8');
+      const expressions: string[] = JSON.parse(content);
+      for (const expression of expressions) {
+        if (!expression) continue;
+        // Warm up run
+        const warmupLexer = new Lexer(expression, { preserveTrivia });
+        warmupLexer.tokenize();
+        // Measure total time for all iterations
+        const start = performance.now();
+        for (let j = 0; j < iterations; j++) {
+          const lexer = new Lexer(expression, { preserveTrivia });
+          lexer.tokenize();
+        }
+        const end = performance.now();
+        totalTime += (end - start);
+        totalExpressions++;
+        totalIterations += iterations;
+      }
+    }
+    const avgTimePerExpression = totalTime / totalIterations;
+    console.log('\n' + '='.repeat(50));
+    console.log('RESULTS');
+    console.log('='.repeat(50));
+    console.log(`Total expressions: ${totalExpressions}`);
+    console.log(`Total iterations: ${totalIterations}`);
+    console.log(`Total time: ${(totalTime / 1000).toFixed(2)}s`);
+    console.log(`Time per expression: ${avgTimePerExpression.toFixed(4)}ms`);
+    console.log(`Expressions per second: ${(1000 / avgTimePerExpression).toFixed(0)}`);
+}