@generaltranslation/python-extractor 0.1.6 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ import fs from 'node:fs';
2
+ import { getParser } from './parser.js';
1
3
  import { PYTHON_DERIVE, PYTHON_DECLARE_STATIC, PYTHON_DECLARE_VAR, } from './constants.js';
2
4
  import { resolveFunctionInCurrentFile, resolveFunctionInFile, } from './resolveFunctionVariants.js';
3
5
  import { extractImports } from './extractImports.js';
@@ -240,6 +242,22 @@ async function resolveStaticValue(node, ctx) {
240
242
  }
241
243
  return resolveFunctionCall(node, ctx);
242
244
  }
245
+ // Identifier: resolve to its assigned value
246
+ if (node.type === 'identifier') {
247
+ const result = await resolveIdentifier(node, ctx);
248
+ if (result)
249
+ return result;
250
+ ctx.errors.push(`${locationStr(node)}: could not resolve identifier "${node.text}" to a static value`);
251
+ return null;
252
+ }
253
+ // Subscript: dictionary access like LABELS[score] — returns all values as choices
254
+ if (node.type === 'subscript') {
255
+ return resolveSubscript(node, ctx);
256
+ }
257
+ // Attribute: dictionary access like obj.attr — returns the specific value
258
+ if (node.type === 'attribute') {
259
+ return resolveAttribute(node, ctx);
260
+ }
243
261
  ctx.errors.push(`${locationStr(node)}: unsupported derive() argument type "${node.type}"`);
244
262
  return null;
245
263
  }
@@ -438,6 +456,481 @@ async function resolveDeclareVarArg(callNode, ctx) {
438
456
  const icuText = declareVar('', options);
439
457
  return { type: 'text', text: icuText };
440
458
  }
459
+ // ===== Constant / Dictionary Resolution ===== //
460
+ /**
461
+ * Finds a top-level assignment `name = <value>` in the given root node.
462
+ * Returns the right-hand side (value) node, or null if not found.
463
+ */
464
+ function findConstantAssignment(name, rootNode) {
465
+ for (let i = 0; i < rootNode.childCount; i++) {
466
+ const child = rootNode.child(i);
467
+ if (!child || child.type !== 'expression_statement')
468
+ continue;
469
+ const expr = child.child(0);
470
+ if (!expr || expr.type !== 'assignment')
471
+ continue;
472
+ const left = expr.childForFieldName('left');
473
+ const right = expr.childForFieldName('right');
474
+ if (left?.type === 'identifier' && left.text === name && right) {
475
+ return right;
476
+ }
477
+ }
478
+ return null;
479
+ }
480
+ /**
481
+ * Guard against infinite recursion when resolving identifier chains.
482
+ * Tracks variable names currently being resolved to detect circular references.
483
+ */
484
+ const resolvingIdentifiers = new Set();
485
+ /**
486
+ * Resolves an identifier to its static value by finding the assignment
487
+ * in the current file or cross-file via imports.
488
+ */
489
+ async function resolveIdentifier(node, ctx) {
490
+ const name = node.text;
491
+ // Guard against circular references (e.g., x = y; y = x)
492
+ const guardKey = `${ctx.filePath}::${name}`;
493
+ if (resolvingIdentifiers.has(guardKey)) {
494
+ return null;
495
+ }
496
+ resolvingIdentifiers.add(guardKey);
497
+ try {
498
+ // Try local assignment first
499
+ const localValue = findConstantAssignment(name, ctx.rootNode);
500
+ if (localValue) {
501
+ return await resolveStaticValue(localValue, ctx);
502
+ }
503
+ // Try cross-file via imports
504
+ const importInfo = findImportForName(name, ctx);
505
+ if (importInfo) {
506
+ let source;
507
+ try {
508
+ source = fs.readFileSync(importInfo.filePath, 'utf8');
509
+ }
510
+ catch {
511
+ return null;
512
+ }
513
+ const parser = await getParser();
514
+ const tree = parser.parse(source);
515
+ if (!tree)
516
+ return null;
517
+ const externalValue = findConstantAssignment(importInfo.originalName, tree.rootNode);
518
+ if (externalValue) {
519
+ const externalImports = extractImportsFromRoot(tree.rootNode, ctx.imports);
520
+ return await resolveStaticValue(externalValue, {
521
+ rootNode: tree.rootNode,
522
+ imports: externalImports,
523
+ filePath: importInfo.filePath,
524
+ errors: ctx.errors,
525
+ });
526
+ }
527
+ }
528
+ return null;
529
+ }
530
+ finally {
531
+ resolvingIdentifiers.delete(guardKey);
532
+ }
533
+ }
534
+ /**
535
+ * Finds a dictionary assignment and returns the dictionary node.
536
+ * Searches locally first, then cross-file via imports.
537
+ * Returns the dictionary node and the context (rootNode, filePath, imports)
538
+ * for resolving values within it.
539
+ */
540
+ async function findDictionaryAssignment(name, ctx) {
541
+ // Try local assignment
542
+ const localValue = findConstantAssignment(name, ctx.rootNode);
543
+ if (localValue &&
544
+ (localValue.type === 'dictionary' || localValue.type === 'list')) {
545
+ return { dictNode: localValue, valueCtx: ctx };
546
+ }
547
+ // Try cross-file
548
+ const importInfo = findImportForName(name, ctx);
549
+ if (importInfo) {
550
+ let source;
551
+ try {
552
+ source = fs.readFileSync(importInfo.filePath, 'utf8');
553
+ }
554
+ catch {
555
+ return null;
556
+ }
557
+ const parser = await getParser();
558
+ const tree = parser.parse(source);
559
+ if (!tree)
560
+ return null;
561
+ const externalValue = findConstantAssignment(importInfo.originalName, tree.rootNode);
562
+ if (externalValue &&
563
+ (externalValue.type === 'dictionary' || externalValue.type === 'list')) {
564
+ const externalImports = extractImportsFromRoot(tree.rootNode, ctx.imports);
565
+ return {
566
+ dictNode: externalValue,
567
+ valueCtx: {
568
+ rootNode: tree.rootNode,
569
+ imports: externalImports,
570
+ filePath: importInfo.filePath,
571
+ errors: ctx.errors,
572
+ },
573
+ };
574
+ }
575
+ }
576
+ return null;
577
+ }
578
+ /**
579
+ * Collects all key-value entries from a dictionary node,
580
+ * including entries from spread sources (**base).
581
+ */
582
+ async function collectDictEntries(dictNode, ctx) {
583
+ const entries = [];
584
+ for (let i = 0; i < dictNode.childCount; i++) {
585
+ const child = dictNode.child(i);
586
+ if (!child)
587
+ continue;
588
+ if (child.type === 'pair') {
589
+ const keyNode = child.childForFieldName('key');
590
+ const valueNode = child.childForFieldName('value');
591
+ if (!valueNode)
592
+ continue;
593
+ let key = null;
594
+ if (keyNode) {
595
+ if (keyNode.type === 'string' && !isFString(keyNode)) {
596
+ key = extractStringContent(keyNode) ?? null;
597
+ }
598
+ else if (keyNode.type === 'identifier') {
599
+ key = keyNode.text;
600
+ }
601
+ else if (keyNode.type === 'integer') {
602
+ key = keyNode.text;
603
+ }
604
+ }
605
+ entries.push({ key, valueNode });
606
+ }
607
+ else if (child.type === 'dictionary_splat') {
608
+ // Get the spread source expression (child after **)
609
+ let splatExpr = null;
610
+ for (let j = 0; j < child.childCount; j++) {
611
+ const splatChild = child.child(j);
612
+ if (splatChild && splatChild.type !== '**') {
613
+ splatExpr = splatChild;
614
+ break;
615
+ }
616
+ }
617
+ if (!splatExpr || splatExpr.type !== 'identifier')
618
+ continue;
619
+ const name = splatExpr.text;
620
+ // Try local first
621
+ const localDict = findConstantAssignment(name, ctx.rootNode);
622
+ if (localDict && localDict.type === 'dictionary') {
623
+ entries.push(...(await collectDictEntries(localDict, ctx)));
624
+ }
625
+ else {
626
+ // Try cross-file
627
+ const importInfo = findImportForName(name, ctx);
628
+ if (importInfo) {
629
+ let source;
630
+ try {
631
+ source = fs.readFileSync(importInfo.filePath, 'utf8');
632
+ }
633
+ catch {
634
+ continue;
635
+ }
636
+ const parser = await getParser();
637
+ const tree = parser.parse(source);
638
+ if (!tree)
639
+ continue;
640
+ const externalValue = findConstantAssignment(importInfo.originalName, tree.rootNode);
641
+ if (externalValue && externalValue.type === 'dictionary') {
642
+ const externalImports = extractImportsFromRoot(tree.rootNode, ctx.imports);
643
+ const externalCtx = {
644
+ rootNode: tree.rootNode,
645
+ imports: externalImports,
646
+ filePath: importInfo.filePath,
647
+ errors: ctx.errors,
648
+ };
649
+ entries.push(...(await collectDictEntries(externalValue, externalCtx)));
650
+ }
651
+ }
652
+ }
653
+ }
654
+ }
655
+ return entries;
656
+ }
657
+ /**
658
+ * Collects all elements from a list node as DictEntry[] with index as key.
659
+ * Handles list_splat (*spread).
660
+ */
661
+ async function collectListEntries(listNode, ctx) {
662
+ const entries = [];
663
+ let index = 0;
664
+ for (let i = 0; i < listNode.childCount; i++) {
665
+ const child = listNode.child(i);
666
+ if (!child)
667
+ continue;
668
+ // Skip brackets and commas
669
+ if (child.type === '[' || child.type === ']' || child.type === ',')
670
+ continue;
671
+ if (child.type === 'list_splat') {
672
+ // *base spread — resolve the source identifier
673
+ let splatExpr = null;
674
+ for (let j = 0; j < child.childCount; j++) {
675
+ const sc = child.child(j);
676
+ if (sc && sc.type !== '*') {
677
+ splatExpr = sc;
678
+ break;
679
+ }
680
+ }
681
+ if (!splatExpr || splatExpr.type !== 'identifier')
682
+ continue;
683
+ const localList = findConstantAssignment(splatExpr.text, ctx.rootNode);
684
+ if (localList && localList.type === 'list') {
685
+ const spreadEntries = await collectListEntries(localList, ctx);
686
+ for (const e of spreadEntries) {
687
+ entries.push({ key: String(index++), valueNode: e.valueNode });
688
+ }
689
+ }
690
+ else {
691
+ // Try cross-file
692
+ const importInfo = findImportForName(splatExpr.text, ctx);
693
+ if (importInfo) {
694
+ let source;
695
+ try {
696
+ source = fs.readFileSync(importInfo.filePath, 'utf8');
697
+ }
698
+ catch {
699
+ continue;
700
+ }
701
+ const parser = await getParser();
702
+ const tree = parser.parse(source);
703
+ if (!tree)
704
+ continue;
705
+ const externalValue = findConstantAssignment(importInfo.originalName, tree.rootNode);
706
+ if (externalValue && externalValue.type === 'list') {
707
+ const externalImports = extractImportsFromRoot(tree.rootNode, ctx.imports);
708
+ const externalCtx = {
709
+ rootNode: tree.rootNode,
710
+ imports: externalImports,
711
+ filePath: importInfo.filePath,
712
+ errors: ctx.errors,
713
+ };
714
+ const spreadEntries = await collectListEntries(externalValue, externalCtx);
715
+ for (const e of spreadEntries) {
716
+ entries.push({ key: String(index++), valueNode: e.valueNode });
717
+ }
718
+ }
719
+ }
720
+ }
721
+ continue;
722
+ }
723
+ // Regular element — any expression
724
+ entries.push({ key: String(index), valueNode: child });
725
+ index++;
726
+ }
727
+ return entries;
728
+ }
729
+ /**
730
+ * Resolves an expression to dictionary AST node(s).
731
+ * Handles identifier, subscript chains, and attribute chains.
732
+ */
733
+ async function resolveToDictNodes(node, ctx) {
734
+ // Case 1: identifier — base case
735
+ if (node.type === 'identifier') {
736
+ const result = await findDictionaryAssignment(node.text, ctx);
737
+ if (result)
738
+ return [result];
739
+ return [];
740
+ }
741
+ // Case 2: subscript (e.g., D["a"] in D["a"]["x"])
742
+ if (node.type === 'subscript') {
743
+ const valueNode = node.childForFieldName('value');
744
+ if (!valueNode)
745
+ return [];
746
+ const parentDicts = await resolveToDictNodes(valueNode, ctx);
747
+ if (parentDicts.length === 0)
748
+ return [];
749
+ const subscriptKey = node.childForFieldName('subscript');
750
+ if (!subscriptKey)
751
+ return [];
752
+ // Check if key is a static string literal
753
+ const isStaticKey = subscriptKey.type === 'string' && !isFString(subscriptKey);
754
+ const staticKeyValue = isStaticKey
755
+ ? extractStringContent(subscriptKey)
756
+ : null;
757
+ const isStaticIntKey = subscriptKey.type === 'integer';
758
+ const staticIntKeyValue = isStaticIntKey ? subscriptKey.text : null;
759
+ const results = [];
760
+ for (const parent of parentDicts) {
761
+ const entries = parent.dictNode.type === 'list'
762
+ ? await collectListEntries(parent.dictNode, parent.valueCtx)
763
+ : await collectDictEntries(parent.dictNode, parent.valueCtx);
764
+ if (staticKeyValue != null || staticIntKeyValue != null) {
765
+ const keyToMatch = staticKeyValue ?? staticIntKeyValue;
766
+ // Static: narrow to matching keys
767
+ for (const entry of entries) {
768
+ if (entry.key === keyToMatch &&
769
+ (entry.valueNode.type === 'dictionary' ||
770
+ entry.valueNode.type === 'list')) {
771
+ results.push({
772
+ dictNode: entry.valueNode,
773
+ valueCtx: parent.valueCtx,
774
+ });
775
+ }
776
+ }
777
+ }
778
+ else {
779
+ // Dynamic: collect ALL entries whose values are dicts/lists
780
+ for (const entry of entries) {
781
+ if (entry.valueNode.type === 'dictionary' ||
782
+ entry.valueNode.type === 'list') {
783
+ results.push({
784
+ dictNode: entry.valueNode,
785
+ valueCtx: parent.valueCtx,
786
+ });
787
+ }
788
+ }
789
+ }
790
+ }
791
+ return results;
792
+ }
793
+ // Case 3: attribute (e.g., D.a in D.a.x)
794
+ if (node.type === 'attribute') {
795
+ const objectNode = node.childForFieldName('object');
796
+ const attrNode = node.childForFieldName('attribute');
797
+ if (!objectNode || !attrNode)
798
+ return [];
799
+ const parentDicts = await resolveToDictNodes(objectNode, ctx);
800
+ if (parentDicts.length === 0)
801
+ return [];
802
+ const attrName = attrNode.text;
803
+ const results = [];
804
+ for (const parent of parentDicts) {
805
+ const entries = await collectDictEntries(parent.dictNode, parent.valueCtx);
806
+ for (const entry of entries) {
807
+ if (entry.key === attrName &&
808
+ (entry.valueNode.type === 'dictionary' ||
809
+ entry.valueNode.type === 'list')) {
810
+ results.push({
811
+ dictNode: entry.valueNode,
812
+ valueCtx: parent.valueCtx,
813
+ });
814
+ }
815
+ }
816
+ }
817
+ return results;
818
+ }
819
+ return [];
820
+ }
821
+ /**
822
+ * Resolves a subscript expression (e.g., `LABELS[score]` or `D["a"]["x"]`)
823
+ * by extracting values from the resolved dictionary.
824
+ * Supports nested access chains and spread resolution.
825
+ */
826
+ async function resolveSubscript(node, ctx) {
827
+ const valueNode = node.childForFieldName('value');
828
+ if (!valueNode) {
829
+ ctx.errors.push(`${locationStr(node)}: subscript missing value`);
830
+ return null;
831
+ }
832
+ // Resolve the object to dict node(s) — supports nesting
833
+ const dicts = await resolveToDictNodes(valueNode, ctx);
834
+ if (dicts.length === 0) {
835
+ ctx.errors.push(`${locationStr(node)}: could not find dictionary or list for "${valueNode.text}"`);
836
+ return null;
837
+ }
838
+ const subscriptKey = node.childForFieldName('subscript');
839
+ const isStaticStringKey = subscriptKey?.type === 'string' && !isFString(subscriptKey);
840
+ const staticStringKeyValue = isStaticStringKey
841
+ ? extractStringContent(subscriptKey)
842
+ : null;
843
+ const isStaticIntKey = subscriptKey?.type === 'integer';
844
+ const staticIntKeyValue = isStaticIntKey ? subscriptKey.text : null;
845
+ const staticKeyValue = staticStringKeyValue ?? staticIntKeyValue;
846
+ const branches = [];
847
+ for (const { dictNode, valueCtx } of dicts) {
848
+ const entries = dictNode.type === 'list'
849
+ ? await collectListEntries(dictNode, valueCtx)
850
+ : await collectDictEntries(dictNode, valueCtx);
851
+ if (staticKeyValue != null) {
852
+ // Static key: resolve matching values (no break — collect all for spread overrides)
853
+ for (const entry of entries) {
854
+ if (entry.key === staticKeyValue) {
855
+ const resolved = await resolveStaticValue(entry.valueNode, valueCtx);
856
+ if (resolved) {
857
+ if (resolved.type === 'choice') {
858
+ branches.push(...resolved.nodes);
859
+ }
860
+ else {
861
+ branches.push(resolved);
862
+ }
863
+ }
864
+ }
865
+ }
866
+ }
867
+ else {
868
+ // Dynamic key: extract ALL values
869
+ for (const entry of entries) {
870
+ const resolved = await resolveStaticValue(entry.valueNode, valueCtx);
871
+ if (resolved) {
872
+ if (resolved.type === 'choice') {
873
+ branches.push(...resolved.nodes);
874
+ }
875
+ else {
876
+ branches.push(resolved);
877
+ }
878
+ }
879
+ }
880
+ }
881
+ }
882
+ if (branches.length === 0) {
883
+ ctx.errors.push(`${locationStr(node)}: collection has no resolvable values`);
884
+ return null;
885
+ }
886
+ if (branches.length === 1)
887
+ return branches[0];
888
+ return { type: 'choice', nodes: branches };
889
+ }
890
+ /**
891
+ * Resolves an attribute access expression (e.g., `obj.attr` or `obj.a.b`)
892
+ * by finding the specific dictionary pair with a matching key.
893
+ * Supports nested access chains and spread resolution.
894
+ */
895
+ async function resolveAttribute(node, ctx) {
896
+ const objectNode = node.childForFieldName('object');
897
+ const attrNode = node.childForFieldName('attribute');
898
+ if (!objectNode || !attrNode) {
899
+ ctx.errors.push(`${locationStr(node)}: attribute access missing object or attribute`);
900
+ return null;
901
+ }
902
+ const attrName = attrNode.text;
903
+ // Resolve the object to dict node(s) — supports nesting
904
+ const dicts = await resolveToDictNodes(objectNode, ctx);
905
+ if (dicts.length === 0) {
906
+ ctx.errors.push(`${locationStr(node)}: could not find dictionary or list for "${objectNode.text}"`);
907
+ return null;
908
+ }
909
+ const branches = [];
910
+ for (const { dictNode, valueCtx } of dicts) {
911
+ const entries = await collectDictEntries(dictNode, valueCtx);
912
+ for (const entry of entries) {
913
+ if (entry.key === attrName) {
914
+ const resolved = await resolveStaticValue(entry.valueNode, valueCtx);
915
+ if (resolved) {
916
+ if (resolved.type === 'choice') {
917
+ branches.push(...resolved.nodes);
918
+ }
919
+ else {
920
+ branches.push(resolved);
921
+ }
922
+ }
923
+ }
924
+ }
925
+ }
926
+ if (branches.length === 0) {
927
+ ctx.errors.push(`${locationStr(node)}: could not find key "${attrName}" in dictionary or list`);
928
+ return null;
929
+ }
930
+ if (branches.length === 1)
931
+ return branches[0];
932
+ return { type: 'choice', nodes: branches };
933
+ }
441
934
  // ===== Helpers ===== //
442
935
  function getFirstPositionalArg(callNode) {
443
936
  const argsNode = callNode.childForFieldName('arguments');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@generaltranslation/python-extractor",
3
- "version": "0.1.6",
3
+ "version": "0.2.1",
4
4
  "description": "Python source code extraction for General Translation",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -47,7 +47,7 @@
47
47
  "dependencies": {
48
48
  "tree-sitter-python": "^0.25.0",
49
49
  "web-tree-sitter": "^0.26.6",
50
- "generaltranslation": "8.1.20"
50
+ "generaltranslation": "8.1.21"
51
51
  },
52
52
  "scripts": {
53
53
  "build": "tsc",