nmhit 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {nmhit-0.2.1 → nmhit-0.3.0}/.github/workflows/release.yml +4 -2
  2. {nmhit-0.2.1 → nmhit-0.3.0}/CMakeLists.txt +1 -1
  3. {nmhit-0.2.1 → nmhit-0.3.0}/PKG-INFO +1 -1
  4. {nmhit-0.2.1 → nmhit-0.3.0}/generated/Lexer.cpp +4 -4
  5. {nmhit-0.2.1 → nmhit-0.3.0}/generated/Parser.h +4 -4
  6. {nmhit-0.2.1 → nmhit-0.3.0}/generated/location.hh +4 -4
  7. {nmhit-0.2.1 → nmhit-0.3.0}/include/nmhit/Node.h +9 -1
  8. {nmhit-0.2.1 → nmhit-0.3.0}/pyproject.toml +2 -2
  9. {nmhit-0.2.1 → nmhit-0.3.0}/python/tests/test_nmhit.py +51 -0
  10. {nmhit-0.2.1 → nmhit-0.3.0}/src/Lexer.l +4 -4
  11. {nmhit-0.2.1 → nmhit-0.3.0}/src/Node.cpp +19 -4
  12. {nmhit-0.2.1 → nmhit-0.3.0}/src/ParseDriver.h +7 -1
  13. {nmhit-0.2.1 → nmhit-0.3.0}/tests/test_hit.cpp +47 -0
  14. {nmhit-0.2.1 → nmhit-0.3.0}/.clang-format +0 -0
  15. {nmhit-0.2.1 → nmhit-0.3.0}/.github/workflows/ci.yml +0 -0
  16. {nmhit-0.2.1 → nmhit-0.3.0}/.gitignore +0 -0
  17. {nmhit-0.2.1 → nmhit-0.3.0}/.pre-commit-config.yaml +0 -0
  18. {nmhit-0.2.1 → nmhit-0.3.0}/.pre-commit-hooks.yaml +0 -0
  19. {nmhit-0.2.1 → nmhit-0.3.0}/CONTRIBUTING.md +0 -0
  20. {nmhit-0.2.1 → nmhit-0.3.0}/README.md +0 -0
  21. {nmhit-0.2.1 → nmhit-0.3.0}/cmake/nmhit.pc.in +0 -0
  22. {nmhit-0.2.1 → nmhit-0.3.0}/cmake/nmhitConfig.cmake.in +0 -0
  23. {nmhit-0.2.1 → nmhit-0.3.0}/generated/Lexer.h +0 -0
  24. {nmhit-0.2.1 → nmhit-0.3.0}/generated/Parser.cpp +0 -0
  25. {nmhit-0.2.1 → nmhit-0.3.0}/include/nmhit/BraceExpr.h +0 -0
  26. {nmhit-0.2.1 → nmhit-0.3.0}/include/nmhit/TypeRegistry.h +0 -0
  27. {nmhit-0.2.1 → nmhit-0.3.0}/include/nmhit/nmhit.h +0 -0
  28. {nmhit-0.2.1 → nmhit-0.3.0}/python/nmhit/__init__.py +0 -0
  29. {nmhit-0.2.1 → nmhit-0.3.0}/python/nmhit/_cli.py +0 -0
  30. {nmhit-0.2.1 → nmhit-0.3.0}/python/nmhit/py.typed +0 -0
  31. {nmhit-0.2.1 → nmhit-0.3.0}/python/src/_nmhit.cpp +0 -0
  32. {nmhit-0.2.1 → nmhit-0.3.0}/src/BraceExpr.cpp +0 -0
  33. {nmhit-0.2.1 → nmhit-0.3.0}/src/Parser.y +0 -0
  34. {nmhit-0.2.1 → nmhit-0.3.0}/tests/CMakeLists.txt +0 -0
@@ -19,12 +19,12 @@ jobs:
19
19
  - uses: actions/checkout@v4
20
20
 
21
21
  - name: Build wheels
22
- uses: pypa/cibuildwheel@v2.23
22
+ uses: pypa/cibuildwheel@v3.4.1
23
23
  env:
24
24
  CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-* cp313-* cp314-*"
25
25
  CIBW_SKIP: "*-musllinux* *-manylinux_i686"
26
26
  CIBW_ARCHS_LINUX: "x86_64"
27
- CIBW_ARCHS_MACOS: "x86_64"
27
+ CIBW_ARCHS_MACOS: "x86_64 arm64"
28
28
  CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
29
29
  CIBW_TEST_REQUIRES: "pytest"
30
30
  CIBW_TEST_COMMAND: "pytest {project}/python/tests/ -v"
@@ -74,6 +74,8 @@ jobs:
74
74
 
75
75
  - name: Publish
76
76
  uses: pypa/gh-action-pypi-publish@release/v1
77
+ with:
78
+ skip-existing: true
77
79
  # No API token needed — configure Trusted Publishing once on pypi.org:
78
80
  # Publisher: GitHub Actions, repo: <owner>/nmhit, workflow: release.yml,
79
81
  # environment: pypi
@@ -1,6 +1,6 @@
1
1
  cmake_minimum_required(VERSION 3.20)
2
2
  # Keep this version in sync with [project] version in pyproject.toml.
3
- project(neml2-hit VERSION 0.2.1 LANGUAGES CXX)
3
+ project(neml2-hit VERSION 0.3.0 LANGUAGES CXX)
4
4
 
5
5
  set(CMAKE_CXX_STANDARD 17)
6
6
  set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nmhit
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: Python bindings for the nmhit NEML2-flavored HIT parser
5
5
  License: MIT
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1004,8 +1004,8 @@ typedef nmhit_detail::ParseDriver HIT_Driver;
1004
1004
  return nmhit_detail::Parser::token::t; } while(0)
1005
1005
  #define TOK_UNQUOTED do { DRIVER->flush_unquoted(); \
1006
1006
  return nmhit_detail::Parser::token::TOK_UNQUOTED_STR; } while(0)
1007
- #define TOK_TRIPLE do { \
1008
- DRIVER->set_verbatim_pending(DRIVER->_triple_str.c_str(), (int)DRIVER->_triple_str.size()); \
1007
+ #define TOK_TRIPLE(delim) do { \
1008
+ DRIVER->set_verbatim_pending(DRIVER->_triple_str.c_str(), (int)DRIVER->_triple_str.size(), delim); \
1009
1009
  return nmhit_detail::Parser::token::TOK_UNQUOTED_STR; \
1010
1010
  } while(0)
1011
1011
  #define YY_NO_INPUT 1
@@ -1581,7 +1581,7 @@ YY_RULE_SETUP
1581
1581
  YY_BREAK
1582
1582
  case 32:
1583
1583
  YY_RULE_SETUP
1584
- { BEGIN(SECTION_BODY); TOK_TRIPLE; }
1584
+ { BEGIN(SECTION_BODY); TOK_TRIPLE('\''); }
1585
1585
  YY_BREAK
1586
1586
  case 33:
1587
1587
  YY_RULE_SETUP
@@ -1598,7 +1598,7 @@ YY_RULE_SETUP
1598
1598
  YY_BREAK
1599
1599
  case 36:
1600
1600
  YY_RULE_SETUP
1601
- { BEGIN(SECTION_BODY); TOK_TRIPLE; }
1601
+ { BEGIN(SECTION_BODY); TOK_TRIPLE('"'); }
1602
1602
  YY_BREAK
1603
1603
  case 37:
1604
1604
  YY_RULE_SETUP
@@ -32,7 +32,7 @@
32
32
 
33
33
 
34
34
  /**
35
- ** \file _build/Parser.h
35
+ ** \file /home/thu/projects/neml2-hit/build/Parser.h
36
36
  ** Define the nmhit_detail::parser class.
37
37
  */
38
38
 
@@ -42,8 +42,8 @@
42
42
  // especially those whose name start with YY_ or yy_. They are
43
43
  // private implementation details that can be changed or removed.
44
44
 
45
- #ifndef YY_YY_BUILD_PARSER_H_INCLUDED
46
- # define YY_YY_BUILD_PARSER_H_INCLUDED
45
+ #ifndef YY_YY_HOME_THU_PROJECTS_NEML2_HIT_BUILD_PARSER_H_INCLUDED
46
+ # define YY_YY_HOME_THU_PROJECTS_NEML2_HIT_BUILD_PARSER_H_INCLUDED
47
47
  // "%code requires" blocks.
48
48
 
49
49
  #include <memory>
@@ -1515,4 +1515,4 @@ switch (yykind)
1515
1515
 
1516
1516
 
1517
1517
 
1518
- #endif // !YY_YY_BUILD_PARSER_H_INCLUDED
1518
+ #endif // !YY_YY_HOME_THU_PROJECTS_NEML2_HIT_BUILD_PARSER_H_INCLUDED
@@ -31,12 +31,12 @@
31
31
  // version 2.2 of Bison.
32
32
 
33
33
  /**
34
- ** \file _build/location.hh
34
+ ** \file /home/thu/projects/neml2-hit/build/location.hh
35
35
  ** Define the nmhit_detail::location class.
36
36
  */
37
37
 
38
- #ifndef YY_YY_BUILD_LOCATION_HH_INCLUDED
39
- # define YY_YY_BUILD_LOCATION_HH_INCLUDED
38
+ #ifndef YY_YY_HOME_THU_PROJECTS_NEML2_HIT_BUILD_LOCATION_HH_INCLUDED
39
+ # define YY_YY_HOME_THU_PROJECTS_NEML2_HIT_BUILD_LOCATION_HH_INCLUDED
40
40
 
41
41
  # include <iostream>
42
42
  # include <string>
@@ -299,4 +299,4 @@ namespace nmhit_detail {
299
299
 
300
300
  } // nmhit_detail
301
301
 
302
- #endif // !YY_YY_BUILD_LOCATION_HH_INCLUDED
302
+ #endif // !YY_YY_HOME_THU_PROJECTS_NEML2_HIT_BUILD_LOCATION_HH_INCLUDED
@@ -266,7 +266,10 @@ private:
266
266
  class Field : public Node
267
267
  {
268
268
  public:
269
- Field(const std::string & name, const std::string & raw_value, bool verbatim = false);
269
+ Field(const std::string & name,
270
+ const std::string & raw_value,
271
+ bool verbatim = false,
272
+ char verbatim_delim = '\'');
270
273
 
271
274
  NodeType type() const override { return NodeType::Field; }
272
275
  std::string path() const override { return _name; }
@@ -283,10 +286,15 @@ public:
283
286
  /// Such fields can only be read via param<std::string>() / param_str().
284
287
  bool is_verbatim() const { return _verbatim; }
285
288
 
289
+ /// Which delimiter the verbatim body was wrapped in originally: '\'' or '"'.
290
+ /// Meaningful only when is_verbatim() is true.
291
+ char verbatim_delim() const { return _verbatim_delim; }
292
+
286
293
  private:
287
294
  std::string _name;
288
295
  std::string _raw;
289
296
  bool _verbatim = false;
297
+ char _verbatim_delim = '\'';
290
298
  };
291
299
 
292
300
  /// A comment, e.g. # some text.
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
4
4
 
5
5
  [project]
6
6
  name = "nmhit"
7
- version = "0.2.1" # Keep in sync with VERSION in CMakeLists.txt.
7
+ version = "0.3.0" # Keep in sync with VERSION in CMakeLists.txt.
8
8
  description = "Python bindings for the nmhit NEML2-flavored HIT parser"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -42,5 +42,5 @@ archs = ["x86_64"]
42
42
  manylinux-x86_64-image = "manylinux_2_28"
43
43
 
44
44
  [tool.cibuildwheel.macos]
45
- archs = ["x86_64"]
45
+ archs = ["x86_64", "arm64"]
46
46
  environment = {MACOSX_DEPLOYMENT_TARGET = "10.15"} # std::filesystem requires 10.15+
@@ -625,3 +625,54 @@ def test_triple_quote_not_readable_as_list():
625
625
  assert root.param_str("k") == "1 2 3"
626
626
  with pytest.raises(nmhit.Error, match="verbatim"):
627
627
  root.param_list_int("k")
628
+
629
+
630
+ # ── triple-quoted round-trip rendering ────────────────────────────────────────
631
+ # Round-trip = parse_text(s).render() == s, byte-for-byte. The formatter
632
+ # (nmhit-format) walks this exact pipeline, so any divergence here is a
633
+ # direct formatter-breaking bug.
634
+
635
+
636
+ def test_triple_single_quote_roundtrip_inline():
637
+ src = "k = '''hello world'''\n"
638
+ assert nmhit.parse_text(src).render() == src
639
+
640
+
641
+ def test_triple_double_quote_roundtrip_inline():
642
+ src = 'k = """hello world"""\n'
643
+ assert nmhit.parse_text(src).render() == src
644
+
645
+
646
+ def test_triple_single_quote_roundtrip_multiline():
647
+ src = "k = '''\n line1\n line2\n'''\n"
648
+ assert nmhit.parse_text(src).render() == src
649
+
650
+
651
+ def test_triple_double_quote_roundtrip_multiline():
652
+ src = 'k = """\n line1\n line2\n"""\n'
653
+ assert nmhit.parse_text(src).render() == src
654
+
655
+
656
+ def test_triple_quote_roundtrip_python_expression():
657
+ src = (
658
+ "[Tensors]\n"
659
+ " [strain]\n"
660
+ " type = Python\n"
661
+ " expr = '''\n"
662
+ "torch.stack([\n"
663
+ " torch.linspace(0, 1, 5),\n"
664
+ " torch.linspace(1, 2, 5),\n"
665
+ "])\n"
666
+ "'''\n"
667
+ " []\n"
668
+ "[]\n"
669
+ )
670
+ assert nmhit.parse_text(src).render() == src
671
+
672
+
673
+ def test_triple_quote_roundtrip_preserves_delimiter_style():
674
+ """The renderer must wrap the body in the same delimiter the input used."""
675
+ src_sq = "k = '''\nbody\n'''\n"
676
+ src_dq = 'k = """\nbody\n"""\n'
677
+ assert nmhit.parse_text(src_sq).render() == src_sq
678
+ assert nmhit.parse_text(src_dq).render() == src_dq
@@ -16,8 +16,8 @@ typedef nmhit_detail::ParseDriver HIT_Driver;
16
16
  return nmhit_detail::Parser::token::t; } while(0)
17
17
  #define TOK_UNQUOTED do { DRIVER->flush_unquoted(); \
18
18
  return nmhit_detail::Parser::token::TOK_UNQUOTED_STR; } while(0)
19
- #define TOK_TRIPLE do { \
20
- DRIVER->set_verbatim_pending(DRIVER->_triple_str.c_str(), (int)DRIVER->_triple_str.size()); \
19
+ #define TOK_TRIPLE(delim) do { \
20
+ DRIVER->set_verbatim_pending(DRIVER->_triple_str.c_str(), (int)DRIVER->_triple_str.size(), delim); \
21
21
  return nmhit_detail::Parser::token::TOK_UNQUOTED_STR; \
22
22
  } while(0)
23
23
  %}
@@ -76,12 +76,12 @@ COMMENT #[^\n]*
76
76
  <AFTER_EQ>[^ \t\n\r\[#$'"\\]+ { DRIVER->begin_unquoted(yytext, yyleng); BEGIN(IN_UNQUOTED); }
77
77
  <AFTER_EQ>. { DRIVER->lex_error(yytext, yylineno); BEGIN(SECTION_BODY); }
78
78
 
79
- <IN_TRIPLE_SQ>"'''" { BEGIN(SECTION_BODY); TOK_TRIPLE; }
79
+ <IN_TRIPLE_SQ>"'''" { BEGIN(SECTION_BODY); TOK_TRIPLE('\''); }
80
80
  <IN_TRIPLE_SQ>"''" { DRIVER->_triple_str += yytext; }
81
81
  <IN_TRIPLE_SQ>"'" { DRIVER->_triple_str += yytext; }
82
82
  <IN_TRIPLE_SQ>[^']+ { DRIVER->_triple_str += yytext; }
83
83
 
84
- <IN_TRIPLE_DQ>"\"\"\"" { BEGIN(SECTION_BODY); TOK_TRIPLE; }
84
+ <IN_TRIPLE_DQ>"\"\"\"" { BEGIN(SECTION_BODY); TOK_TRIPLE('"'); }
85
85
  <IN_TRIPLE_DQ>"\"\"" { DRIVER->_triple_str += yytext; }
86
86
  <IN_TRIPLE_DQ>"\"" { DRIVER->_triple_str += yytext; }
87
87
  <IN_TRIPLE_DQ>[^\"]+ { DRIVER->_triple_str += yytext; }
@@ -524,8 +524,11 @@ Section::clone() const
524
524
  // Field
525
525
  // ═══════════════════════════════════════════════════════════════════════════════
526
526
 
527
- Field::Field(const std::string & name, const std::string & raw_value, bool verbatim)
528
- : _name(name), _raw(raw_value), _verbatim(verbatim)
527
+ Field::Field(const std::string & name,
528
+ const std::string & raw_value,
529
+ bool verbatim,
530
+ char verbatim_delim)
531
+ : _name(name), _raw(raw_value), _verbatim(verbatim), _verbatim_delim(verbatim_delim)
529
532
  {}
530
533
 
531
534
  void
@@ -540,13 +543,22 @@ Field::render(int indent, const std::string & indent_text) const
540
543
  std::string pfx;
541
544
  for (int i = 0; i < indent; ++i)
542
545
  pfx += indent_text;
546
+ if (_verbatim)
547
+ {
548
+ // Wrap the body back in its original triple-quote delimiter so round-
549
+ // tripping is byte-exact. Body cannot contain three consecutive copies
550
+ // of the delimiter (the lexer would have closed the string), so the
551
+ // surrounding triple-quote is always a valid wrapping.
552
+ std::string triple(3, _verbatim_delim);
553
+ return pfx + _name + " = " + triple + _raw + triple + "\n";
554
+ }
543
555
  return pfx + _name + " = " + _raw + "\n";
544
556
  }
545
557
 
546
558
  std::unique_ptr<Node>
547
559
  Field::clone() const
548
560
  {
549
- auto f = std::make_unique<Field>(_name, _raw);
561
+ auto f = std::make_unique<Field>(_name, _raw, _verbatim, _verbatim_delim);
550
562
  f->_set_location(filename(), line(), column());
551
563
  return f;
552
564
  }
@@ -937,8 +949,11 @@ ParseDriver::build_field(const std::string & name,
937
949
  segs.push_back(name);
938
950
 
939
951
  bool verbatim = _next_field_verbatim;
952
+ char verbatim_delim = _next_field_verbatim_delim;
940
953
  _next_field_verbatim = false;
941
- auto field = std::make_unique<nmhit::Field>(segs.back(), raw_value, verbatim);
954
+ _next_field_verbatim_delim = '\'';
955
+ auto field =
956
+ std::make_unique<nmhit::Field>(segs.back(), raw_value, verbatim, verbatim_delim);
942
957
  field->_set_location(_fname, loc.begin.line, loc.begin.column);
943
958
  if (is_override)
944
959
  _override_fields.insert(field.get());
@@ -51,11 +51,14 @@ public:
51
51
 
52
52
  /// Like set_pending, but also marks the next field as verbatim (triple-quoted).
53
53
  /// Verbatim fields can only be retrieved as std::string via param_str().
54
- void set_verbatim_pending(const char * text, int len)
54
+ /// `delim` records which character ('\'' or '"') the triple-quote used so
55
+ /// the renderer can wrap the body in the same delimiter on output.
56
+ void set_verbatim_pending(const char * text, int len, char delim)
55
57
  {
56
58
  _pending.assign(text, len);
57
59
  _has_pending = true;
58
60
  _next_field_verbatim = true;
61
+ _next_field_verbatim_delim = delim;
59
62
  }
60
63
 
61
64
  /// Report a lexer error and mark the parse as failed.
@@ -139,6 +142,9 @@ private:
139
142
  // True when the pending value came from a triple-quoted verbatim string.
140
143
  // Consumed and reset in build_field(); Field stores the flag permanently.
141
144
  bool _next_field_verbatim = false;
145
+ // Which character ('\'' or '"') wrapped the triple-quoted value; used by
146
+ // Field::render to round-trip with the same delimiter style.
147
+ char _next_field_verbatim_delim = '\'';
142
148
 
143
149
  // Brace expression nesting counter
144
150
  int _brace_depth = 0;
@@ -298,6 +298,53 @@ main()
298
298
  EXPECT(rendered.find("# My comment") != std::string::npos);
299
299
  });
300
300
 
301
+ // Byte-exact round-trip for triple-quoted (verbatim) fields. The formatter
302
+ // (nmhit-format) walks this exact pipeline; before this was fixed, render
303
+ // emitted `name = <body>` and dropped the `'''...'''` delimiters entirely.
304
+
305
+ run("round_trip_triple_quoted_inline_single", []() {
306
+ std::string input = "k = '''hello world'''\n";
307
+ EXPECT(p(input)->render() == input);
308
+ });
309
+
310
+ run("round_trip_triple_quoted_inline_double", []() {
311
+ std::string input = "k = \"\"\"hello world\"\"\"\n";
312
+ EXPECT(p(input)->render() == input);
313
+ });
314
+
315
+ run("round_trip_triple_quoted_multiline", []() {
316
+ std::string input = "k = '''\n line one\n line two\n'''\n";
317
+ EXPECT(p(input)->render() == input);
318
+ });
319
+
320
+ run("round_trip_triple_quoted_python_expr", []() {
321
+ // The motivating use case: a Python expression for a [Tensors] block
322
+ // with intentional indentation. The leading newline + indented body +
323
+ // trailing newline must all round-trip byte-for-byte.
324
+ std::string input =
325
+ "[Tensors]\n"
326
+ " [strain]\n"
327
+ " type = Python\n"
328
+ " expr = '''\n"
329
+ "torch.stack([\n"
330
+ " torch.linspace(0, 1, 5),\n"
331
+ " torch.linspace(1, 2, 5),\n"
332
+ "])\n"
333
+ "'''\n"
334
+ " []\n"
335
+ "[]\n";
336
+ EXPECT(p(input)->render() == input);
337
+ });
338
+
339
+ run("round_trip_triple_quoted_delimiter_preserved", []() {
340
+ // The renderer must wrap the body in the same delimiter character the
341
+ // input used — '''...''' stays single-triple, """...""" stays double.
342
+ std::string sq = "k = '''\nbody\n'''\n";
343
+ std::string dq = "k = \"\"\"\nbody\n\"\"\"\n";
344
+ EXPECT(p(sq)->render() == sq);
345
+ EXPECT(p(dq)->render() == dq);
346
+ });
347
+
301
348
  // ── 7. Override assignment ────────────────────────────────────────────────
302
349
 
303
350
  run("override_assign", []() {
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes